From ab54a33695297972d578aa56ecebf17e6e1dd97f Mon Sep 17 00:00:00 2001 From: Tony Hirst Date: Mon, 1 Dec 2014 13:03:56 +0000 Subject: [PATCH 1/9] Usability hacks Add menu options to run script; add save to Drive --- converttomarkdown.gapps | 61 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 54 insertions(+), 7 deletions(-) diff --git a/converttomarkdown.gapps b/converttomarkdown.gapps index b6344de..69ca856 100644 --- a/converttomarkdown.gapps +++ b/converttomarkdown.gapps @@ -1,13 +1,42 @@ +/** + * Creates a menu entry in the Google Docs UI when the document is opened. + * + * @param {object} e The event parameter for a simple onOpen trigger. To + * determine which authorization mode (ScriptApp.AuthMode) the trigger is + * running in, inspect e.authMode. + */ +function onOpen(e) { + DocumentApp.getUi().createAddonMenu() + .addItem('Email md', 'emailMarkDown') + .addItem('Save md', 'saveMarkDown') + .addToUi(); +} + +/** + * Runs when the add-on is installed. + * + * @param {object} e The event parameter for a simple onInstall trigger. To + * determine which authorization mode (ScriptApp.AuthMode) the trigger is + * running in, inspect e.authMode. (In practice, onInstall triggers always + * run in AuthMode.FULL, but onOpen triggers may be AuthMode.LIMITED or + * AuthMode.NONE.) + */ +function onInstall(e) { + onOpen(e); +} + /* Usage: Adding this script to your doc: - - Tools > Script Manager > New - - Select "Blank Project", then paste this code in and save. + - Tools > Script Editor > New + - Select "Blank Project", then paste this code in and save eg as "md exporter" + - Reload the original doc Running the script: - - Tools > Script Manager - - Select "ConvertToMarkdown" function. - - Click Run button. - - Converted doc will be mailed to you. Subject will be "[MARKDOWN_MAKER]...". + - In the Add-Ons menu, find the new menu eg "md exporter" + - Select "Email md" to email markdown doc and saved images to yourself + -- Converted doc will be mailed to you. Subject will be "[MARKDOWN_MAKER]..." + - Select "Save md" to save markdown doc and images to your Google Drive + -- A new folder with same name as original doc will be created containing the files */ function ConvertToMarkdown() { @@ -70,6 +99,11 @@ function ConvertToMarkdown() { attachments.push({"fileName":DocumentApp.getActiveDocument().getName()+".md", "mimeType": "text/plain", "content": text}); + return attachments +} + +function emailMarkDown(){ + attachments=ConvertToMarkdown() MailApp.sendEmail(Session.getActiveUser().getEmail(), "[MARKDOWN_MAKER] "+DocumentApp.getActiveDocument().getName(), "Your converted markdown document is attached (converted from "+DocumentApp.getActiveDocument().getUrl()+")"+ @@ -77,6 +111,19 @@ function ConvertToMarkdown() { { "attachments": attachments }); } +function saveMarkDown(){ + attachments=ConvertToMarkdown() + folder=DocumentApp.getActiveDocument().getName() + fid=DriveApp.createFolder(folder) + for (var i=0; i/g, '>'); } @@ -286,4 +333,4 @@ function processTextElement(inSrc, txt) { lastOff=off; } return pOut; -} +} \ No newline at end of file From 035e10f731fc8e0338d54b6821af45e8a19276fa Mon Sep 17 00:00:00 2001 From: Tony Hirst Date: Mon, 1 Dec 2014 13:13:06 +0000 Subject: [PATCH 2/9] Update documentation --- README.md | 30 +++++++++++++++++------------- converttomarkdown.gapps | 2 +- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index fba7771..fde038f 100644 --- a/README.md +++ b/README.md @@ -4,19 +4,21 @@ gdocs2md A simple Google Apps script to convert a properly formatted Google Drive Document to the markdown (.md) format. ## Usage - - * Adding this script to your doc (once per doc): - * Open your Google Drive document (http://drive.google.com) - * Tools -> Script Manager > New - * Select "Blank Project", then paste this code in and save. - * Clear the myFunction() default empty function and paste the contents of [converttomarkdown.gapps](https://raw.github.com/mangini/gdocs2md/master/converttomarkdown.gapps) into the code editor - * File -> Save + + * Adding this script to your doc: + * Tools > Script Editor > New + * Select "Blank Project" + * Clear the example code and paste in the contents of [converttomarkdown.gapps](https://raw.github.com/psychemedia/gdocs2md/master/converttomarkdown.gapps) + * Save with an appropriate project name, eg *md exporter* + * Reload the original doc - * Running the script (run as many times as you want): - - Tools > Script Manager - - Select "ConvertToMarkdown" function. - - Click Run button (First run will require you to authorize it. Authorize and run again) - - Converted doc with images attached will be emailed to you. Subject will be "[MARKDOWN_MAKER]...". + * Running the script (first run of each function will require you to authorize it): + * In the Add-Ons menu, find the new menu eg "md exporter" + * Select "Email md" to email markdown doc and saved images to yourself + * Converted doc with images attached will be mailed to you. Subject will be "[MARKDOWN_MAKER]..." + * Select "Save md" to save markdown doc and images to your Google Drive + * A new folder with same name as original doc will be created containing the markdown and image files + ## Interpreted formats @@ -47,6 +49,8 @@ A simple Google Apps script to convert a properly formatted Google Drive Documen * Renato Mangini - [G+](//google.com/+renatomangini) - [Github](//github.com/mangini) * Ed Bacher - [G+](//plus.google.com/106923847899206957842) - [Github](//github.com/evbacher) +* Tony Hirst - [Github](//github.com/psychemedia) + ## LICENSE Use this script at your will, on any document you want and for any purpose, commercial or not. @@ -57,7 +61,7 @@ If you want to modify and redistribute the script (not the converted documents - just keep a reference to this repo or to the license info below: ``` -Copyright 2013 Google Inc. All Rights Reserved. +Original content: Copyright 2013 Google Inc. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/converttomarkdown.gapps b/converttomarkdown.gapps index 69ca856..270e1e7 100644 --- a/converttomarkdown.gapps +++ b/converttomarkdown.gapps @@ -31,7 +31,7 @@ Usage: - Tools > Script Editor > New - Select "Blank Project", then paste this code in and save eg as "md exporter" - Reload the original doc - Running the script: + Running the script (first run of each function will require you to authorize it): - In the Add-Ons menu, find the new menu eg "md exporter" - Select "Email md" to email markdown doc and saved images to yourself -- Converted doc will be mailed to you. Subject will be "[MARKDOWN_MAKER]..." From c7a788c822f3ec1085a38d6884226ae0fd8ebfe5 Mon Sep 17 00:00:00 2001 From: Tony Hirst Date: Mon, 1 Dec 2014 14:39:11 +0000 Subject: [PATCH 3/9] Add lack of support for INLINE_DRAWING --- converttomarkdown.gapps | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/converttomarkdown.gapps b/converttomarkdown.gapps index 270e1e7..f48d9f9 100644 --- a/converttomarkdown.gapps +++ b/converttomarkdown.gapps @@ -166,7 +166,10 @@ function processParagraph(index, element, inSrc, imageCounter, listCounters) { for (var i = 0; i < element.getNumChildren(); i++) { var t=element.getChild(i).getType(); - if (t === DocumentApp.ElementType.TABLE_ROW) { + if (t === DocumentApp.ElementType.INLINE_DRAWING) { + // do nothing substantive: no export as image available for now? + textElements.push('\nINLINE_DRAWING - UNSUPPORTED EXPORT\n'); + } else if (t === DocumentApp.ElementType.TABLE_ROW) { // do nothing: already handled TABLE_ROW } else if (t === DocumentApp.ElementType.TEXT) { var txt=element.getChild(i); From e73dfc80a37a422161d63ec4b09cf973d769bf0f Mon Sep 17 00:00:00 2001 From: Tony Hirst Date: Tue, 2 Dec 2014 18:37:00 +0000 Subject: [PATCH 4/9] First attempt at OUXML export --- converttoOUXML.gapps | 505 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 505 insertions(+) create mode 100644 converttoOUXML.gapps diff --git a/converttoOUXML.gapps b/converttoOUXML.gapps new file mode 100644 index 0000000..b0ca58c --- /dev/null +++ b/converttoOUXML.gapps @@ -0,0 +1,505 @@ +/** + * Creates a menu entry in the Google Docs UI when the document is opened. + * + * @param {object} e The event parameter for a simple onOpen trigger. To + * determine which authorization mode (ScriptApp.AuthMode) the trigger is + * running in, inspect e.authMode. + */ +function onOpen(e) { + DocumentApp.getUi().createAddonMenu() + .addItem('Email OUXML', 'emailOUXML') + .addItem('Save OUXML', 'saveOUXML') + .addToUi(); +} + +/** + * Runs when the add-on is installed. + * + * @param {object} e The event parameter for a simple onInstall trigger. To + * determine which authorization mode (ScriptApp.AuthMode) the trigger is + * running in, inspect e.authMode. (In practice, onInstall triggers always + * run in AuthMode.FULL, but onOpen triggers may be AuthMode.LIMITED or + * AuthMode.NONE.) + */ +function onInstall(e) { + onOpen(e); +} + +/* +Usage: + Adding this script to your doc: + - Tools > Script Editor > New + - Select "Blank Project", then paste this code in and save eg as "md exporter" + - Reload the original doc + Running the script (first run of each function will require you to authorize it): + - In the Add-Ons menu, find the new menu eg "md exporter" + - Select "Email md" to email markdown doc and saved images to yourself + -- Converted doc will be mailed to you. Subject will be "[MARKDOWN_MAKER]..." + - Select "Save md" to save markdown doc and images to your Google Drive + -- A new folder with same name as original doc will be created containing the files +*/ + +HEADER='\n' +FOOTER='\n' + +function ConvertToOUXML() { + var numChildren = DocumentApp.getActiveDocument().getActiveSection().getNumChildren(); + var text = HEADER + '\n\n\n\n\n'; + text+="\n\n"+DocumentApp.getActiveDocument().getName()+"\n\n" + var inSrc = false; + var inClass = false; + var globalImageCounter = 0; + var globalListCounters = {}; + // edbacher: added a variable for indent in src
 block. Let style sheet do margin.
+  var srcIndent = "";
+  
+  var attachments = [];
+  
+  // Walk through all the child elements of the doc.
+  var inlist= false;
+  var inSublist = false;
+  var inSession=false;
+  var inSection=false;
+  var inSubSection=false;
+  for (var i = 0; i < numChildren; i++) {
+    var child = DocumentApp.getActiveDocument().getActiveSection().getChild(i);
+    var result = processParagraph(i, child, inSrc, globalImageCounter, globalListCounters);
+    
+    if (child.getType()===DocumentApp.ElementType.PARAGRAPH) {
+      var heading=child.getHeading();
+      //Crappy default hierarchy to try to organise Session/Section/SubSection
+      //Partial attempt at making elements robust
+      if (heading===DocumentApp.ParagraphHeading.HEADING3) {
+        if (inSubSection) text=text+"\n";
+        if (!(inSession)) {
+          text=text+"\n\n";
+          inSession=true;
+        }
+        if (!(inSection)) {
+          text=text+"
\n\n"; + inSection=true; + } + text=text+"\n"; + inSubSection=true; + } else if (heading===DocumentApp.ParagraphHeading.HEADING2) { + if (inSubSection) { + text=text+"\n
\n"; + inSubSection=false; + } else if (inSection) text=text+"\n"; + if (!(inSession)) { + text=text+"\n\n"; + inSession=true; + } + text=text+"
\n"; + inSection=true; + } else if (heading===DocumentApp.ParagraphHeading.HEADING1) { + if (inSubSection) { + text=text+"\n
\n
\n"; + inSubSection=false; + inSection=false; + } else if (inSection) { + text=text+"\n
\n"; + inSection=false; + } else if (inSession) text=text+"\n"; + text=text+"\n"; + inSession=true; + } + } + if (child.getType()===DocumentApp.ElementType.LIST_ITEM) { + inlist=true; + var listItem = child; + var nesting = listItem.getNestingLevel() + + var key = listItem.getListId()+ '.' + listItem.getNestingLevel(); + var counter = globalListCounters[key] || 0; + counter++; + globalListCounters[key] = counter; + //counter===1 says we're seeing a list for the first time + if (counter===1) { + if (nesting===0) { + if (inSublist){ + text=text + '\n\n\n'; + inSublist=false; + } + text=text+"\n"; + } else if (nesting===1) { + if (inlist) { + text=text.replace(/<\/ListItem>\s*$/, '\n'); + } else { + text=text+"\n\n"; + inList=true; + } + text=text+""; + inSublist= true; + } else throw "Paragraph "+index+" of type "+element.getType()+" has an unsupported nesting level of " +nesting; + } else { + //We're in a list we know about... + if (nesting===0) { + if (inSublist){ + text=text + '\n\n\n'; + inSublist=false; + } + } else if (nesting===1){ + if (!(inSublist)){ + text=text.replace(/<\/ListItem>\s*$/, '\n'); + text=text+""; + inSublist=true; + } + } + } + } else if ( (inlist) || (inSublist) ) { + //We've hit a non-list item but we're in a list... So make good... + if (inSublist) text=text + '\n\n\n'; + text=text + '\n\n\n'; + inlist=false; + inSublist=false; + } + + + globalImageCounter += (result && result.images) ? result.images.length : 0; + if (result!==null) { + if (result.sourcePretty==="start" && !inSrc) { + inSrc=true; + text+="
\n";
+      } else if (result.sourcePretty==="end" && inSrc) {
+        inSrc=false;
+        text+="
\n\n"; + } else if (result.source==="start" && !inSrc) { + inSrc=true; + text+="
\n";
+      } else if (result.source==="end" && inSrc) {
+        inSrc=false;
+        text+="
\n\n"; + } else if (result.inClass==="start" && !inClass) { + inClass=true; + text+="
\n"; + } else if (result.inClass==="end" && inClass) { + inClass=false; + text+="
\n\n"; + } else if (inClass) { + text+=result.text+"\n\n"; + } else if (inSrc) { + text+=(srcIndent+escapeHTML(result.text)+"\n"); + } else if (result.text && result.text.length>0) { + text+=result.text+"\n\n"; + } + + if (result.images && result.images.length>0) { + for (var j=0; j\s*<\/Session>/g,'').replace(/
\s*<\/Section>/g,'').replace(/\s*<\/SubSection>/g,''); + + //replace ampersand + //text = text.replace(/&/g,"&").replace(/&/g,'&'); + + + attachments.push({"fileName":DocumentApp.getActiveDocument().getName()+".md", "mimeType": "text/plain", "content": text}); + + return attachments +} + +function emailOUXML(){ + attachments=ConvertToOUXML() + MailApp.sendEmail(Session.getActiveUser().getEmail(), + "[MARKDOWN_MAKER] "+DocumentApp.getActiveDocument().getName(), + "Your converted markdown document is attached (converted from "+DocumentApp.getActiveDocument().getUrl()+")"+ + "\n\nDon't know how to use the format options? See http://github.com/mangini/gdocs2md\n", + { "attachments": attachments }); +} + +function saveOUXML(){ + attachments=ConvertToOUXML() + folder=DocumentApp.getActiveDocument().getName() + fid=DriveApp.createFolder(folder) + for (var i=0; i/g, '>'); +} + +// Process each child element (not just paragraphs). +function processParagraph(index, element, inSrc, imageCounter, listCounters) { + // First, check for things that require no processing. + if (element.getNumChildren()==0) { + return null; + } + // Punt on TOC. + if (element.getType() === DocumentApp.ElementType.TABLE_OF_CONTENTS) { + return {"text": "[[TOC]]"}; + } + + // Set up for real results. + var result = {}; + var pOut = ""; + var textElements = []; + var imagePrefix = "image_"; + + // Handle Table elements. Pretty simple-minded now, but works for simple tables. + // Note that Markdown does not process within block-level HTML, so it probably + // doesn't make sense to add markup within tables. + if (element.getType() === DocumentApp.ElementType.TABLE) { + textElements.push("\n\n\n"); + var nCols = element.getChild(0).getNumCells(); + for (var i = 0; i < element.getNumChildren(); i++) { + textElements.push(" \n"); + // process this row + for (var j = 0; j < nCols; j++) { + textElements.push(" \n"); + } + textElements.push(" \n"); + } + textElements.push("\n
" + element.getChild(i).getChild(j).getText() + "
\n"); + } + + //Image must be in a para on its own. + //This is a really horrible fudge + var ic=0 + for (var i = 0; i < element.getNumChildren(); i++) { + var t=element.getChild(i).getType(); + if (t=== DocumentApp.ElementType.INLINE_IMAGE) { + ic++; + result.images = result.images || []; + var contentType = element.getChild(i).getBlob().getContentType(); + var extension = ""; + if (/\/png$/.test(contentType)) { + extension = ".png"; + } else if (/\/gif$/.test(contentType)) { + extension = ".gif"; + } else if (/\/jpe?g$/.test(contentType)) { + extension = ".jpg"; + } else { + throw "Unsupported image type: "+contentType; + } + var name = imagePrefix + imageCounter + extension; + imageCounter++; + textElements.push('
'); + result.images.push( { + "bytes": element.getChild(i).getBlob().getBytes(), + "type": contentType, + "name": name}); + var pOut = ""; + for (var j=0; j1) throw "Paragraph "+index+" of type "+element.getType()+" has multiple elements alongside an image"; + + + // Process various types (ElementType). + for (var i = 0; i < element.getNumChildren(); i++) { + var t=element.getChild(i).getType(); + + if (t === DocumentApp.ElementType.INLINE_DRAWING) { + // do nothing substantive: no export as image available for now? + textElements.push('\nINLINE_DRAWING - UNSUPPORTED EXPORT\n'); + } else if (t === DocumentApp.ElementType.TABLE_ROW) { + // do nothing: already handled TABLE_ROW + } else if (t === DocumentApp.ElementType.TEXT) { + var txt=element.getChild(i); + pOut += escapeHTML(txt.getText()); + textElements.push(txt); + } else if (t === DocumentApp.ElementType.PAGE_BREAK) { + // ignore + } else if (t === DocumentApp.ElementType.HORIZONTAL_RULE) { + textElements.push('* * *\n'); + } else if (t === DocumentApp.ElementType.FOOTNOTE) { + textElements.push(' (NOTE: '+element.getChild(i).getFootnoteContents().getText()+')'); + } else { + throw "Paragraph "+index+" of type "+element.getType()+" has an unsupported child: " + +t+" "+(element.getChild(i)["getText"] ? element.getChild(i).getText():'')+" index="+index; + } + } + + if (textElements.length==0) { + // Isn't result empty now? + return result; + } + + // evb: Add source pretty too. (And abbreviations: src and srcp.) + // process source code block: + if (/^\s*---\s+srcp\s*$/.test(pOut) || /^\s*---\s+source pretty\s*$/.test(pOut)) { + result.sourcePretty = "start"; + } else if (/^\s*---\s+src\s*$/.test(pOut) || /^\s*---\s+source code\s*$/.test(pOut)) { + result.source = "start"; + } else if (/^\s*---\s+class\s+([^ ]+)\s*$/.test(pOut)) { + result.inClass = "start"; + result.className = RegExp.$1; + } else if (/^\s*---\s*$/.test(pOut)) { + result.source = "end"; + result.sourcePretty = "end"; + result.inClass = "end"; + } else if (/^\s*---\s+jsperf\s*([^ ]+)\s*$/.test(pOut)) { + result.text = ''; + } else { + + prefix = findPrefix(inSrc, element, listCounters); + + var pOut = ""; + for (var i=0; i");prefix.push("");break; + case DocumentApp.ParagraphHeading.HEADING5: prefix.push("");prefix.push("");break; + case DocumentApp.ParagraphHeading.HEADING4: prefix.push("");prefix.push("");break; + case DocumentApp.ParagraphHeading.HEADING3: prefix.push("");break; + case DocumentApp.ParagraphHeading.HEADING2: prefix.push("<Title>");break; + case DocumentApp.ParagraphHeading.HEADING1: prefix.push("<Title>");break; + default: prefix.push("<Paragraph>"); + } + } else if (element.getType()===DocumentApp.ElementType.LIST_ITEM) { + var listItem = element; + var nesting = listItem.getNestingLevel() + + if (nesting===0) prefix.push("<ListItem>") + else if (nesting===1) prefix.push("<SubListItem>") + /* + for (var i=0; i<nesting; i++) { + prefix += " "; + } + var gt = listItem.getGlyphType(); + // Bullet list (<ul>): + if (gt === DocumentApp.GlyphType.BULLET + || gt === DocumentApp.GlyphType.HOLLOW_BULLET + || gt === DocumentApp.GlyphType.SQUARE_BULLET) { + prefix += "* "; + } else { + // Ordered list (<ol>): + var key = listItem.getListId() + '.' + listItem.getNestingLevel(); + var counter = listCounters[key] || 0; + counter++; + listCounters[key] = counter; + prefix += counter+". "; + } + //*/ + } + } + return prefix; +} + +function processTextElement(inSrc, txt) { + if (typeof(txt) === 'string') { + return escapeAmp(txt); + } + + var pOut = txt.getText(); + if (! txt.getTextAttributeIndices) { + return pOut; + } + + var attrs=txt.getTextAttributeIndices(); + var lastOff=pOut.length; + //This whole thing is broken - becuase the length of pOut keeps changing... + for (var i=attrs.length-1; i>=0; i--) { + var off=attrs[i]; + var url=txt.getLinkUrl(off); + var font=txt.getFontFamily(off); + var fontColour=txt.getForegroundColor(off); + + if (url) { // start of link + if (i>=1 && attrs[i-1]==off-1 && txt.getLinkUrl(attrs[i-1])===url) { + // detect links that are in multiple pieces because of errors on formatting: + i-=1; + off=attrs[i]; + url=txt.getLinkUrl(off); + } + //pOut=pOut.substring(0, off)+'['+pOut.substring(off, lastOff)+']('+url+')'+pOut.substring(lastOff); + pOut=pOut.substring(0, off)+'<a href="'+url+'">'+pOut.substring(off, lastOff)+'</a>'+pOut.substring(lastOff); + } else if (font) { + if (!inSrc && font===font.COURIER_NEW) { + while (i>=1 && txt.getFontFamily(attrs[i-1]) && txt.getFontFamily(attrs[i-1])===font.COURIER_NEW) { + // detect fonts that are in multiple pieces because of errors on formatting: + i-=1; + off=attrs[i]; + } + pOut=pOut.substring(0, off)+'<ComputerCode>'+escapeHTML(pOut.substring(off, lastOff))+'</ComputerCode>'+pOut.substring(lastOff); + + //The bold and italic stuff is borked; + } + } + + if (fontColour) pOut=pOut.substring(0, off)+'[AuthorComment]'+pOut.substring(off, lastOff)+'[/AuthorComment]'+pOut.substring(lastOff); + + if (txt.isBold(off)) { + var d1 = "<b>"; + var d2 = "</b>"; + if (txt.isItalic(off)) { + // edbacher: changed this to handle bold italic properly. + d1 = "<b><i>"; d2 = "</i></b>"; + } + pOut=pOut.substring(0, off)+d1+pOut.substring(off, lastOff)+d2+pOut.substring(lastOff); + } else if (txt.isItalic(off)) { + pOut=pOut.substring(0, off)+'<i>'+pOut.substring(off, lastOff)+'</i>'+pOut.substring(lastOff); + } + lastOff=off; + } + + + //HORRIBLE HACK + //The above is broken, occasionally injecting tags into anchor tag, because the string lengths keep changing + //Workaround for now... + var urlPatch = /<[ib]>(\[AuthorComment\])?(<a [^<]*)<\/[ib]>([^>]*">)/; //<[ib]>?(<a [^<]*)<\/[ib]>([^>]*">); // /<[ib]>(<a [^<]*)<\/[ib]>([^"]*">)/; + pOut = pOut.replace(urlPatch, "$1$2$3"); + urlPatch=/(<a [^\[]*)\[\/AuthorComment\]([^>]*">)/; + pOut=pOut.replace(urlPatch, "$1$2"); + + return escapeAmp(pOut); +} \ No newline at end of file From 95ce6bfdb6e87c0448ea4c51ebd13f3aa7930e9e Mon Sep 17 00:00:00 2001 From: Tony Hirst <tony.hirst@gmail.com> Date: Wed, 3 Dec 2014 18:10:38 +0000 Subject: [PATCH 5/9] Patch handling of emphasis tags --- converttoOUXML.gapps | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/converttoOUXML.gapps b/converttoOUXML.gapps index b0ca58c..932f7c7 100644 --- a/converttoOUXML.gapps +++ b/converttoOUXML.gapps @@ -207,7 +207,7 @@ function ConvertToOUXML() { //Hack fixes - bleurghh text=text.replace(/<Session>\s*<\/Session>/g,'').replace(/<Section>\s*<\/Section>/g,'').replace(/<SubSection>\s*<\/SubSection>/g,''); - + text=text.replace(/<Paragraph>\s*<\/Paragraph>/,'').replace('</i><i>','').replace('</b><b>',''); //replace ampersand //text = text.replace(/&/g,"&").replace(/&/g,'&'); @@ -381,7 +381,7 @@ function processParagraph(index, element, inSrc, imageCounter, listCounters) { //result.text = prefix+pOut+prefix.replace('<','</'); while (prefix.length){ var tag=prefix.pop() - pOut= tag +pOut + tag.replace('<','</'); + if ((pOut) && pOut!=" ") pOut= tag +pOut + tag.replace('<','</'); } result.text=pOut } @@ -444,16 +444,15 @@ function processTextElement(inSrc, txt) { if (! txt.getTextAttributeIndices) { return pOut; } - + Logger.log(pOut) var attrs=txt.getTextAttributeIndices(); var lastOff=pOut.length; - //This whole thing is broken - becuase the length of pOut keeps changing... + for (var i=attrs.length-1; i>=0; i--) { var off=attrs[i]; var url=txt.getLinkUrl(off); var font=txt.getFontFamily(off); - var fontColour=txt.getForegroundColor(off); - + Logger.log('i %s, off %s, url %s,font %s',i,off,url,font) if (url) { // start of link if (i>=1 && attrs[i-1]==off-1 && txt.getLinkUrl(attrs[i-1])===url) { // detect links that are in multiple pieces because of errors on formatting: @@ -463,6 +462,8 @@ function processTextElement(inSrc, txt) { } //pOut=pOut.substring(0, off)+'['+pOut.substring(off, lastOff)+']('+url+')'+pOut.substring(lastOff); pOut=pOut.substring(0, off)+'<a href="'+url+'">'+pOut.substring(off, lastOff)+'</a>'+pOut.substring(lastOff); + Logger.log('wurl1 %s',pOut) + lastOff+='<a href=""></a>'.length+url.length; } else if (font) { if (!inSrc && font===font.COURIER_NEW) { while (i>=1 && txt.getFontFamily(attrs[i-1]) && txt.getFontFamily(attrs[i-1])===font.COURIER_NEW) { @@ -472,12 +473,10 @@ function processTextElement(inSrc, txt) { } pOut=pOut.substring(0, off)+'<ComputerCode>'+escapeHTML(pOut.substring(off, lastOff))+'</ComputerCode>'+pOut.substring(lastOff); - //The bold and italic stuff is borked; } + } - if (fontColour) pOut=pOut.substring(0, off)+'[AuthorComment]'+pOut.substring(off, lastOff)+'[/AuthorComment]'+pOut.substring(lastOff); - if (txt.isBold(off)) { var d1 = "<b>"; var d2 = "</b>"; @@ -486,9 +485,17 @@ function processTextElement(inSrc, txt) { d1 = "<b><i>"; d2 = "</i></b>"; } pOut=pOut.substring(0, off)+d1+pOut.substring(off, lastOff)+d2+pOut.substring(lastOff); + lastOff+="<b><i></i></b>".length; } else if (txt.isItalic(off)) { pOut=pOut.substring(0, off)+'<i>'+pOut.substring(off, lastOff)+'</i>'+pOut.substring(lastOff); + lastOff+="<i></i>".length; } + Logger.log('wurl3 %s',pOut) + //This still treats headers as AuthorComment - trap on a better colour? Only certain colours? + if (txt.getForegroundColor(off) && (!txt.getLinkUrl(off)) ) { + pOut=pOut.substring(0, off)+'<AuthorComment>'+pOut.substring(off, lastOff)+'</AuthorComment>'+pOut.substring(lastOff); + } + Logger.log('wurl3 %s',pOut) lastOff=off; } @@ -496,10 +503,10 @@ function processTextElement(inSrc, txt) { //HORRIBLE HACK //The above is broken, occasionally injecting tags into anchor tag, because the string lengths keep changing //Workaround for now... - var urlPatch = /<[ib]>(\[AuthorComment\])?(<a [^<]*)<\/[ib]>([^>]*">)/; //<[ib]>?(<a [^<]*)<\/[ib]>([^>]*">); // /<[ib]>(<a [^<]*)<\/[ib]>([^"]*">)/; - pOut = pOut.replace(urlPatch, "$1$2$3"); - urlPatch=/(<a [^\[]*)\[\/AuthorComment\]([^>]*">)/; - pOut=pOut.replace(urlPatch, "$1$2"); - + //var urlPatch = /<[ib]>(\[AuthorComment\])?(<a [^<]*)<\/[ib]>([^>]*">)/; //<[ib]>?(<a [^<]*)<\/[ib]>([^>]*">); // /<[ib]>(<a [^<]*)<\/[ib]>([^"]*">)/; + //pOut = pOut.replace(urlPatch, "$1$2$3"); + //urlPatch=/(<a [^\[]*)\[\/AuthorComment\]([^>]*">)/; + //pOut=pOut.replace(urlPatch, "$1$2"); + Logger.log(pOut); return escapeAmp(pOut); } \ No newline at end of file From d18f61bb2972679a309223a18b9a6b4a03a6c849 Mon Sep 17 00:00:00 2001 From: Tony Hirst <tony.hirst@gmail.com> Date: Mon, 15 Dec 2014 14:02:45 +0000 Subject: [PATCH 6/9] Start to think about metadata --- converttoOUXML.gapps | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/converttoOUXML.gapps b/converttoOUXML.gapps index 932f7c7..b6235d4 100644 --- a/converttoOUXML.gapps +++ b/converttoOUXML.gapps @@ -39,12 +39,19 @@ Usage: -- A new folder with same name as original doc will be created containing the files */ -HEADER='<Item id="" TextType="CompleteItem" SchemaVersion="2.0" PageStartNumber="1" Template="Generic_A4_Unnumbered" Module="default" DiscussionAlias="Discussion" SessionAlias="" SecondColour="None" ThirdColour="None" FourthColour="None" Logo="colour" Rendering="">\n' -FOOTER='\n</Item>' +//User vars - pick these up eg from parent folder name? +var COURSECODE='TM351-WS'; +var ITEMID='X_exportPreview'; +//----- function ConvertToOUXML() { + var HEADER='<Item id="'+ITEMID+'" TextType="CompleteItem" SchemaVersion="2.0" PageStartNumber="1" Template="Generic_A4_Unnumbered" Module="default" DiscussionAlias="Discussion" SessionAlias="" SecondColour="None" ThirdColour="None" FourthColour="None" Logo="colour" Rendering="">\n'; + var FOOTER='\n</Item>'; + var numChildren = DocumentApp.getActiveDocument().getActiveSection().getNumChildren(); - var text = HEADER + '<CourseCode/>\n<CourseTitle/>\n<ItemID/>\n<ItemTitle/>\n\n'; + var courseCode=; + var courseTitle=COURSECODE; + var text = HEADER + '<CourseCode>'+courseCode+'</CourseCode>\n<CourseTitle>'+CourseTitle+'</CourseTitle>\n<ItemID/>\n<ItemTitle/>\n\n'; text+="<Unit>\n<UnitID/>\n<UnitTitle>"+DocumentApp.getActiveDocument().getName()+"</UnitTitle>\n<ByLine/>\n" var inSrc = false; var inClass = false; @@ -106,6 +113,11 @@ function ConvertToOUXML() { } } if (child.getType()===DocumentApp.ElementType.LIST_ITEM) { + //Refactor note: as we hit a list element for the first time, + // perhaps we should go into a while loop, incrementing i + //until such a time as we aren't still in a LIST_ITEM? + //This would allow us to more easily handle the outer list tags? + inlist=true; var listItem = child; var nesting = listItem.getNestingLevel() From 6c6c21f0a259a4b3769d9dd2efdc95a4b8ca310e Mon Sep 17 00:00:00 2001 From: Tony Hirst <tony.hirst@gmail.com> Date: Mon, 15 Dec 2014 14:06:00 +0000 Subject: [PATCH 7/9] Patch image export --- converttoOUXML.gapps | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/converttoOUXML.gapps b/converttoOUXML.gapps index b6235d4..979be6f 100644 --- a/converttoOUXML.gapps +++ b/converttoOUXML.gapps @@ -246,8 +246,9 @@ function saveOUXML(){ name=attachments[i]['fileName'] content=attachments[i]['content'] mimeType=attachments[i]['mimeType'] - //f=DriveApp.createFile(name, content, mimeType) - fid.createFile(name, content, mimeType) + //Inefficient and going round the houses - we converted out of blob originally... + var blob= Utilities.newBlob(content, mimeType, name) + fid.createFile(blob) } } From 311320d02993873d964f11d283e9f1faefb5572f Mon Sep 17 00:00:00 2001 From: Tony Hirst <tony.hirst@gmail.com> Date: Wed, 28 Jan 2015 13:09:42 +0000 Subject: [PATCH 8/9] Some hacky bug fixes. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Really need to use one of the forks of the original script that parses the Google doc in a much cleaner, recursive way, and that can address schema validation and character escaping rather more sensibly… --- converttoOUXML.gapps | 51 +++++++++++++++++++++++++++++++++----------- 1 file changed, 38 insertions(+), 13 deletions(-) diff --git a/converttoOUXML.gapps b/converttoOUXML.gapps index 979be6f..84b2fcf 100644 --- a/converttoOUXML.gapps +++ b/converttoOUXML.gapps @@ -8,7 +8,8 @@ function onOpen(e) { DocumentApp.getUi().createAddonMenu() .addItem('Email OUXML', 'emailOUXML') - .addItem('Save OUXML', 'saveOUXML') + .addItem('Save OUXML to GDrive', 'saveOUXML') + .addItem('Save OUXML to GDrive (zip)', 'saveOUXMLzip') .addToUi(); } @@ -49,9 +50,9 @@ function ConvertToOUXML() { var FOOTER='\n</Item>'; var numChildren = DocumentApp.getActiveDocument().getActiveSection().getNumChildren(); - var courseCode=; + var courseCode=COURSECODE; var courseTitle=COURSECODE; - var text = HEADER + '<CourseCode>'+courseCode+'</CourseCode>\n<CourseTitle>'+CourseTitle+'</CourseTitle>\n<ItemID/>\n<ItemTitle/>\n\n'; + var text = HEADER + '<CourseCode>'+courseCode+'</CourseCode>\n<CourseTitle>'+courseTitle+'</CourseTitle>\n<ItemID/>\n<ItemTitle/>\n\n'; text+="<Unit>\n<UnitID/>\n<UnitTitle>"+DocumentApp.getActiveDocument().getName()+"</UnitTitle>\n<ByLine/>\n" var inSrc = false; var inClass = false; @@ -219,18 +220,23 @@ function ConvertToOUXML() { //Hack fixes - bleurghh text=text.replace(/<Session>\s*<\/Session>/g,'').replace(/<Section>\s*<\/Section>/g,'').replace(/<SubSection>\s*<\/SubSection>/g,''); + //?? More general to replace following with routine to add <Title> if or
tag + // is followed by a tag that isn't ?? + text=text.replace(/<Session>\s*<Section>/g,'<Session><Title>
'); + text=text.replace(/
\s*/g,'
'); + text=text.replace(/
\s*/g,'
'); text=text.replace(/\s*<\/Paragraph>/,'').replace('','').replace('',''); //replace ampersand //text = text.replace(/&/g,"&").replace(/&/g,'&'); - attachments.push({"fileName":DocumentApp.getActiveDocument().getName()+".md", "mimeType": "text/plain", "content": text}); + attachments.push({"fileName":DocumentApp.getActiveDocument().getName()+".xml", "mimeType": "text/xml", "content": text}); return attachments } function emailOUXML(){ - attachments=ConvertToOUXML() + var attachments=ConvertToOUXML() MailApp.sendEmail(Session.getActiveUser().getEmail(), "[MARKDOWN_MAKER] "+DocumentApp.getActiveDocument().getName(), "Your converted markdown document is attached (converted from "+DocumentApp.getActiveDocument().getUrl()+")"+ @@ -239,19 +245,35 @@ function emailOUXML(){ } function saveOUXML(){ - attachments=ConvertToOUXML() - folder=DocumentApp.getActiveDocument().getName() - fid=DriveApp.createFolder(folder) + var attachments=ConvertToOUXML() + var folder=DocumentApp.getActiveDocument().getName() + var fid=DriveApp.createFolder(folder) for (var i=0; i/g,'♦') Logger.log(pOut) var attrs=txt.getTextAttributeIndices(); var lastOff=pOut.length; @@ -521,5 +546,5 @@ function processTextElement(inSrc, txt) { //urlPatch=/(]*">)/; //pOut=pOut.replace(urlPatch, "$1$2"); Logger.log(pOut); - return escapeAmp(pOut); + return escapeAmp(pOut).replace(/♣/g,'<').replace(/♦/g,'>'); } \ No newline at end of file From 4eded7ae8f228f7ed402b10942e3e7700a9ee9f2 Mon Sep 17 00:00:00 2001 From: Tony Hirst Date: Wed, 28 Jan 2015 18:44:01 +0000 Subject: [PATCH 9/9] Add support for metadata, update README --- README.md | 20 ++++++++++-------- converttoOUXML.gapps | 34 +++++++++++++++++++++++++++--- metadata.html | 50 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 92 insertions(+), 12 deletions(-) create mode 100644 metadata.html diff --git a/README.md b/README.md index fde038f..8eb8b4c 100644 --- a/README.md +++ b/README.md @@ -1,23 +1,26 @@ gdocs2md ======== -A simple Google Apps script to convert a properly formatted Google Drive Document to the markdown (.md) format. +A derivate of a simple Google Apps script forked originally from https://github.com/mangini/gdocs2md to convert an appropriately formatted Google Drive Document to as OU-XML. ## Usage * Adding this script to your doc: * Tools > Script Editor > New * Select "Blank Project" - * Clear the example code and paste in the contents of [converttomarkdown.gapps](https://raw.github.com/psychemedia/gdocs2md/master/converttomarkdown.gapps) + * Clear the example code and paste in the contents of [converttoOUXML.gapps](https://raw.github.com/psychemedia/gdocs2md/master/converttoOUXML.gapps) + * Create a new HTML file called *metadata* and copy and paste in the contents of *metadata.html* * Save with an appropriate project name, eg *md exporter* * Reload the original doc * Running the script (first run of each function will require you to authorize it): - * In the Add-Ons menu, find the new menu eg "md exporter" - * Select "Email md" to email markdown doc and saved images to yourself - * Converted doc with images attached will be mailed to you. Subject will be "[MARKDOWN_MAKER]..." - * Select "Save md" to save markdown doc and images to your Google Drive - * A new folder with same name as original doc will be created containing the markdown and image files + * In the Add-Ons menu, find the new menu eg "gdoc2OUXML" + * Select "Metadata" to open a form that allows you to set various metadata properties that are either required or expected in the OUXML + * Select "Email OUXML" to email markdown doc and saved images to yourself + * Converted doc with images attached will be mailed to you. Subject will be "[OUXML_MAKER]..." + * Select "Save OUXML to GDrive" to save markdown doc and images to your Google Drive + * A new folder with same name as original doc will be created containing the OUXML document and image files + * Select "Save OUXML to GDrive (zip)" to save a zip file containing the XML and any images in one bundle. The zip file will be saves to the top level of your Google Drive account. @@ -57,8 +60,7 @@ Use this script at your will, on any document you want and for any purpose, comm The MarkDown files generated by this script are not considered derivative work and don't require any attribution to the owners of this script. -If you want to modify and redistribute the script (not the converted documents - those are yours), -just keep a reference to this repo or to the license info below: +If you want to modify and redistribute the script (not the converted documents - those are yours), just keep a reference to this repo or to the license info below: ``` Original content: Copyright 2013 Google Inc. All Rights Reserved. diff --git a/converttoOUXML.gapps b/converttoOUXML.gapps index 84b2fcf..36a069b 100644 --- a/converttoOUXML.gapps +++ b/converttoOUXML.gapps @@ -7,6 +7,7 @@ */ function onOpen(e) { DocumentApp.getUi().createAddonMenu() + .addItem('Metadata','metadataView') .addItem('Email OUXML', 'emailOUXML') .addItem('Save OUXML to GDrive', 'saveOUXML') .addItem('Save OUXML to GDrive (zip)', 'saveOUXMLzip') @@ -40,13 +41,40 @@ Usage: -- A new folder with same name as original doc will be created containing the files */ + +function metadataView() { + html= HtmlService + .createTemplateFromFile('metadata') + .evaluate() + .setSandboxMode(HtmlService.SandboxMode.IFRAME); + DocumentApp.getUi().showModalDialog(html, 'Metadata'); +} + + +function processMetadataForm(theForm) { + var props=PropertiesService.getDocumentProperties() + // + for (var item in theForm) { + props.setProperty(item,theForm[item]) + Logger.log(item+"::"+theForm[item]); + } +} + +function getProp(key) { + var props= PropertiesService.getDocumentProperties() + return props.getProperty(key) ? props.getProperty(key) : ''; +} + +//------ //User vars - pick these up eg from parent folder name? -var COURSECODE='TM351-WS'; + +var COURSECODE= getProp('courseCode'); var ITEMID='X_exportPreview'; +var RENDERING=getProp('rendering') //----- function ConvertToOUXML() { - var HEADER='\n'; + var HEADER='\n'; var FOOTER='\n'; var numChildren = DocumentApp.getActiveDocument().getActiveSection().getNumChildren(); @@ -238,7 +266,7 @@ function ConvertToOUXML() { function emailOUXML(){ var attachments=ConvertToOUXML() MailApp.sendEmail(Session.getActiveUser().getEmail(), - "[MARKDOWN_MAKER] "+DocumentApp.getActiveDocument().getName(), + "[OUXML_MAKER] "+DocumentApp.getActiveDocument().getName(), "Your converted markdown document is attached (converted from "+DocumentApp.getActiveDocument().getUrl()+")"+ "\n\nDon't know how to use the format options? See http://github.com/mangini/gdocs2md\n", { "attachments": attachments }); diff --git a/metadata.html b/metadata.html new file mode 100644 index 0000000..b1b6360 --- /dev/null +++ b/metadata.html @@ -0,0 +1,50 @@ + + + + + + + + + + +
+ + +
: + 2) val= metadataItems[i][2] ?> + value= "" + /> +
+ + +
+ +
+ + + +
\ No newline at end of file