From ce10d5a0d3a97b27414c55da51b3aab20fc02eb3 Mon Sep 17 00:00:00 2001 From: Ricardo Henrique Gracini Guiraldelli Date: Sun, 30 Aug 2015 00:26:28 +0200 Subject: [PATCH] Date processing and clean up of source code The functions that process the date are implemented and some clean up of the code was made. --- date_processor.js | 21 +++++++++++++++++++++ email_connector.js | 10 +++++----- email_processor.js | 34 +++++++++++++++++++++++----------- utils.js | 8 ++++++++ 4 files changed, 57 insertions(+), 16 deletions(-) create mode 100644 date_processor.js create mode 100644 utils.js diff --git a/date_processor.js b/date_processor.js new file mode 100644 index 0000000..1c1635b --- /dev/null +++ b/date_processor.js @@ -0,0 +1,21 @@ +// regex pattern for finding the dates +var REGEX_DATE = /((\d{1,2}/\d{1,2}/(\d{4}|\d{2}))|(\d{4}-\d{2}-\d{2})|(\d{1,2}(st|nd|rd|th)*\s*(Jan|January|Feb|February|Mar|March|Apr|April|May|Jun|June|Jul|July|Aug|August|Sep|September|Oct|October|Nov|November|Dec|December)\s*(\d{4}|\d{2}))|((Jan|January|Feb|February|Mar|March|Apr|April|May|Jun|June|Jul|July|Aug|August|Sep|September|Oct|October|Nov|November|Dec|December)(,)?\s*\d{1,2}(st|nd|rd|th)*\s*(\d{4}|\d{2}))|((Jan|January|Feb|February|Mar|March|Apr|April|May|Jun|June|Jul|July|Aug|August|Sep|September|Oct|October|Nov|November|Dec|December)\s*\d{1,2}(st|nd|rd|th)*\s*(,)?\s*(\d{4}|\d{2})))/i; +// ISO format for dates +var DATE_ISO_FORMAT = "yyyy-MM-dd"; + +// gets a mathced date from regex and converts to a string in the ISO format +// using the Datejs library +function get_iso_date(matched_date){ + Date.parse(matched_date).toString(DATE_ISO_FORMAT); +} + +// returns the matched date found in the line +function get_literal_date(line){ + return line.match(REGEX_DATE); +} + +// verifies if a line contains a date +// returning a true value in positive case +function has_date(line){ + return REGEX_DATE.test(line); +} diff --git a/email_connector.js b/email_connector.js index 3863ab9..befff77 100644 --- a/email_connector.js +++ b/email_connector.js @@ -28,12 +28,12 @@ function get_message_text(gmail_message){ return gmail_message.getPlainBody(); } +// returns the subject text of a GMail message +function get_subject_text(gmail_message){ + return gmail_message.getSubject(); +} + // main function, which returns the plain text body of all unread GMail messages function get_body_all_unread_messages(){ return get_unread_messages(get_unread_threads()).map(get_message_text); } - -// debug function -function debug(){ - Logger.log("Unread messages:\n%s", get_body_all_unread_messages()); -} diff --git a/email_processor.js b/email_processor.js index ced37ab..f42dc4d 100644 --- a/email_processor.js +++ b/email_processor.js @@ -1,16 +1,28 @@ // regex patter for finding the "key line" that classifies the email as "call for papers" -var regex_key_line = /((call\s+for\s+(paper|papers))|submission|deadline)/i; //ignore case -// regex pattern for finding the dates -// var regex_date = /((\d{1,2}/\d{1,2}/(\d{4}|\d{2}))|(\d{4}-\d{2}-\d{2})|(\d{1,2}(st|nd|rd|th)*\s*(Jan|January|Feb|February|Mar|March|Apr|April|May|Jun|June|Jul|July|Aug|August|Sep|September|Oct|October|Nov|November|Dec|December)\s*(\d{4}|\d{2}))|((Jan|January|Feb|February|Mar|March|Apr|April|May|Jun|June|Jul|July|Aug|August|Sep|September|Oct|October|Nov|November|Dec|December)(,)?\s*\d{1,2}(st|nd|rd|th)*\s*(\d{4}|\d{2}))|((Jan|January|Feb|February|Mar|March|Apr|April|May|Jun|June|Jul|July|Aug|August|Sep|September|Oct|October|Nov|November|Dec|December)\s*\d{1,2}(st|nd|rd|th)*\s*(,)?\s*(\d{4}|\d{2})))/i; +var REGEX_KEY_LINE = /((call\s+for\s+(paper|papers))|submission|deadline)/i; //ignore case +// regex for the line which contains paper submission deadline information +var REGEX_PAPER_DEADLINE = /(paper(s)?)*(submission|deadline)(paper(s)?)*/i; -function find_key_line(line){ - return regex_key_line.test(line); +// verifies if a line contains the information of a call for paper email, +// returning a true value in positive case +function has_key_line(line){ + return REGEX_KEY_LINE.test(line); } -// returns -1 if no match is found -// otherwise, returns the index of the match -// see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/search -// or https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions?redirectlocale=en-US&redirectslug=JavaScript%2FGuide%2FRegular_Expressions#Working_with_regular_expressions -function find_date(line){ - return line.search(regex_date); +// verifies if the line contain the keywords for paper submission deadline date, +// returning a true value in positive case +function is_paper_deadline(line){ + return line.test(REGEX_PAPER_DEADLINE); +} + +// takes a GmailMessage object and process it, extracting +function process_email(gmail_message){ + var subject = get_subject_text(gmail_message); + var lines_body = break_lines(get_message_text(gmail_message)); + for (line in lines_body){ + if (has_date(line) && is_paper_deadline(line)){ + var iso_date = get_iso_date(get_literal_date(line)); + // TODO: create Google Calendar entry + } + } } diff --git a/utils.js b/utils.js new file mode 100644 index 0000000..0f927b3 --- /dev/null +++ b/utils.js @@ -0,0 +1,8 @@ +// modified from http://beckism.com/2010/09/splitting-lines-javascript/ +// modified from https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp#Using_regular_expression_to_split_lines_with_different_line_endingsends_of_lineline_breaks +var LINE_BREAKS = /^.*((\r\n|\n|\r)|$)/gm; + +function break_lines(string){ + // return string.match(LINE_BREAKS); + return string.split(LINE_BREAKS); +}