Text parsing and Email Parsing and Artificial Intelligence through C# - Regex Expertise with zoho recruitment with Full Source Code

Hi,

Most of us are looking for email parsers for their systems so as to parse emails from GMail or any other pop3 provider and insert data in the emails to databases.

I just created a small application which extracts data from GMail and inserts it in zoho recruitment programme. Let me share its code with you:


using System;
using System.Collections.Generic;
using System.Web;
using System.Web.UI;
using System.Web.UI.WebControls;
using OpenPop.Pop3;
using OpenPop.Mime;
using System.Data;
using System.Net;
using System.IO;
using System.Text;
using System.Collections;
using Deveel.Web.Zoho;
using System.Text.RegularExpressions;
using Message = OpenPop.Mime.Message;

public partial class CS : System.Web.UI.Page
{
    private Dictionary<int, Message> messages;
    protected void Read_Emails(object sender, EventArgs e)
    {
        string body;
        int success = 0;

        messages = new Dictionary<int, Message>();
        Pop3Client pop3Client = new Pop3Client();
        //string title = "";
        try
        {
                txtMailServer.Text = "pop.gmail.com";
                txtPort.Text = "995";
                chkSSL.Checked = true;              
                pop3Client.Connect(txtMailServer.Text, int.Parse(txtPort.Text), chkSSL.Checked);            
                pop3Client.Authenticate(txtUserName.Text, txtPassword.Text);
                Session["Pop3Client"] = pop3Client;
                DataTable dtMessages = new DataTable();
                dtMessages.Columns.Add("Title");
                dtMessages.Columns.Add("Client");
                dtMessages.Columns.Add("Description");
                dtMessages.Columns.Add("Body");
                dtMessages.Columns.Add("Status");
             
            int count = pop3Client.GetMessageCount();
            bool flag = true;
            for (int i = count; i >= 1; i--)            
            {
                if (Session["Pop3Client"]==null)
                    Session["Pop3Client"] = pop3Client;
                Message message = pop3Client.GetMessage(i);
                MessagePart plainTextPart = message.FindFirstPlainTextVersion();
                 if (plainTextPart != null)
                {
                    // The message had a text/plain version - show that one
                    body = plainTextPart.GetBodyAsText();
                }
                else
                {
                    // Try to find a body to show in some of the other text versions
                    List<MessagePart> textVersions = message.FindAllTextVersions();
                    if (textVersions.Count >= 1)
                        body = textVersions[0].GetBodyAsText();
                    else
                        body = "<<OpenPop>> Cannot find a text version body in this message to show <<OpenPop>>";
                }

                List<MessagePart> attachments = message.FindAllAttachments();
                foreach (MessagePart attachment in attachments)
                { }
                if (body.Contains("eMail client must be configured in order to see this HTML format"))
                    flag = false;
                else
                    flag = true;
                // It is to remove empty lines from the body
                string originalBody = body;
                body = Regex.Replace(body, @"^\s+$[\r\n]*", "", RegexOptions.Multiline);              
                string title = "Un Titled";
                string client = "";
                string assigned_recruiter = "";
                string location = "";
                string duration = "";
                string clientManager = "";
                string NumberOfpositions="1";
                string JobDescription = "";
                string JobType = "Contract";
                string InterviewType = "To be determined";

                Match match= Regex.Match(body, "title\\s*([^\r\n]*)",
        RegexOptions.IgnoreCase);
                if (match.Success)
                    title = match.Groups[0].Value.Substring(6);
                else
                {
                    match = Regex.Match(body, "Position\\s*([^\r\n]*)",
            RegexOptions.IgnoreCase);
                    if (match.Success)
                        title = match.Groups[0].Value.Substring(9);
                    else
                    {
                        match = Regex.Match(body, "Role\\s*([^\r\n]*)",
                RegexOptions.IgnoreCase);
                        if (match.Success)
                            title = match.Groups[0].Value.Substring(6);
                    }
                }

                if (title=="")
                    title = "Un Titled";

                if (flag)
                {
                    match = Regex.Match(body, "client\\s*([^\r\n]*)",
            RegexOptions.IgnoreCase);
                    if (match.Success)
                        client = match.Groups[0].Value.Substring(7);

                    match = Regex.Match(body, "location\\s*([^\r\n]*)",
            RegexOptions.IgnoreCase);
                    if (match.Success)
                        location = match.Groups[0].Value.Substring(9);


                    match = Regex.Match(body, "duration:\\s*([^\r\n]*)",
            RegexOptions.IgnoreCase);
                    if (match.Success)
                        duration = match.Groups[0].Value.Substring(9);


                    match = Regex.Match(body, "phone:\\s*([^\r\n]*)",
            RegexOptions.IgnoreCase);
                    if (match.Success)
                        InterviewType = "Phone Interview Only";

                    match = Regex.Match(body, "Phone and In-person:\\s*([^\r\n]*)",
            RegexOptions.IgnoreCase);
                    if (match.Success)
                        InterviewType = "Phone then in-person";
                 
                    match = Regex.Match(body, "Client manager:\\s*([^\r\n]*)",
            RegexOptions.IgnoreCase);
                    if (match.Success)
                        clientManager = match.Groups[0].Value.Substring(9);

                    match = Regex.Match(body, "Positions:\\s*([^\r\n]*)",
           RegexOptions.IgnoreCase);
                    if (match.Success)
                        clientManager = match.Groups[0].Value.Substring(9);

                    match = Regex.Match(body, "Temporary^*([^\\s]*)",
            RegexOptions.IgnoreCase);
                    if (match.Success)
                        JobType = "Contract";


                    match = Regex.Match(body, "Contract to Hire^*([^\\s]*)",
            RegexOptions.IgnoreCase);
                    if (match.Success)
                        JobType = "Contract to Hire";


                    match = Regex.Match(body, "Temporary to Permanent^*([^\\s]*)",
            RegexOptions.IgnoreCase);
                    if (match.Success)
                        JobType = "Temporary to Permanent";                  

                    body = fnRemoveSplChars(body);
                    originalBody = fnRemoveSplChars(originalBody);
                    JobDescription = matchDescription(originalBody);
                    string[] host= message.Headers.From.MailAddress.Host.Split('.');                
                    Hashtable recordInfo = new Hashtable();
                    recordInfo.Add("Posting title", fnRemoveSplChars(title));
                    recordInfo.Add("Client", host[0]);
                    recordInfo.Add("Assigned recruiter", assigned_recruiter);
                    recordInfo.Add("Location", fnRemoveSplChars(location));
                    recordInfo.Add("Project Length", fnRemoveSplChars(duration));
                    recordInfo.Add("Client manager", clientManager);
                    recordInfo.Add("Job opening status", "pending");
                    recordInfo.Add("Number of positions", fnRemoveSplChars(NumberOfpositions));
                    recordInfo.Add("Country", "USA");
                    recordInfo.Add("Client Contact", message.Headers.From.MailAddress.Address);
                    recordInfo.Add("Job Description", JobDescription);
                    recordInfo.Add("Job type", JobType);
                    recordInfo.Add("Posted on", message.Headers.Date);
                    recordInfo.Add("Interview Type", InterviewType);
                    recordInfo.Add("Email", body);
                    recordInfo.Add("Subject", fnRemoveSplChars(message.Headers.Subject));
                 
                    string result = addRecord(recordInfo);
                    dtMessages.Rows.Add();
                    dtMessages.Rows[dtMessages.Rows.Count - 1]["Title"] = title;
                    dtMessages.Rows[dtMessages.Rows.Count - 1]["Client"] = host[0];
                    dtMessages.Rows[dtMessages.Rows.Count - 1]["Description"] = JobDescription;
                    dtMessages.Rows[dtMessages.Rows.Count - 1]["Body"] = originalBody;
                    dtMessages.Rows[dtMessages.Rows.Count - 1]["Status"] = result;
                 
                    if (result.Contains("Successfully Added"))
                    messages.Add(i, message);                  
                    success++;
                }                              
            }

           foreach (int i in messages.Keys)
             pop3Client.DeleteMessage(i);          
            lblMessage.Text = "A total of " + success.ToString() + " Emails have been processed.";
            gvEmails.DataSource = dtMessages;
            gvEmails.DataBind();
         
        }
        catch(Exception exp) {
            lblMessage.Text = exp.Message;
        }
     
        pop3Client.Disconnect();
        pop3Client.Dispose();
    }
    public static string fnRemoveSplChars(string strMyString)
    {
        StringBuilder sb = new StringBuilder();
        foreach (char c in strMyString)
        {
            if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
                (c == '\n') || (c == '\r') || (c == ' ') || (c == '.') ||
                (c == ',') || (c == ';') || (c == '"') || (c == '@') ||
                (c == '=') || (c == '_') || (c == '-'))
            {
                sb.Append(c);
            }
        }
        return sb.ToString();
    }
    public string matchDescription(string body)
    {
        string description = "";
        bool flag = false;
        using (var reader = new StringReader(body))
        {
            string line;
            while ((line = reader.ReadLine()) != null)
            {
                if (line.ToLower().Contains("thanks") || line.ToLower().Contains("regards") || line.ToLower().Contains("about collabera"))
                    flag = false;

                if (flag)
                    description = description + "\n" + line;
                if (line.ToLower().Contains("skills") || line.ToLower().Contains("description") || line.ToLower().Contains("job summary"))
                    flag = true;
            }
        }
        return description;
    }
public string addRecord(Hashtable ht)
{
    string res = "";
    //string token = "be1dd7a5ffasdfasdfasdfasdfasdzxcx7bb"; // token for test account

    //URL to get auth token is: https://accounts.zoho.com/apiauthtoken/nb/create?SCOPE=

    if (token != "false")
    {      
        StringBuilder xmlStr = new StringBuilder();    
        xmlStr.Append("<JobOpenings>");
        xmlStr.Append("<row no=\"1\">");
        IDictionaryEnumerator enu = ht.GetEnumerator();
        while (enu.MoveNext())
        {
            xmlStr.AppendLine("<FL val=\"" + enu.Key + "\">");
            xmlStr.AppendLine(enu.Value + "</FL>");          
        }
        xmlStr.AppendLine("</row></JobOpenings>");
        string param = "authtoken=" + token +
            "&scope=recruitapi&xmlData=" + xmlStr.ToString();
        string apiUrl = "https://recruit.zoho.com/ats/private/xml/JobOpenings/addRecords";
        //string apiUrl = "http://recruit.zoho.com/ats/private/xml/Module/getRecords";  
        res = getResponseFromUrl(apiUrl, param);
    }
    else
    {
        res = "Token Invalid";
    }
    if (res.ToLower().Contains("success"))
        return "Successfully Added";
    else
        return "Error in addition";
}

    public string getToken(string user , string password)
    {
        string iamtokenid = "false";
        try
        {
            string url = "https://accounts.zoho.com/apiauthtoken/create?SCOPE=zohopeople/recruitapi";
            string param = "EMAIL_ID=" + user + "&PASSWORD=" + password;
            string res = getResponseFromUrl(url, param);
            int toindex = res.IndexOf("RESULT=");
            int fromindex = res.IndexOf("AUTHTOKEN=");
            int length = toindex - fromindex - 11;
            string tokenid = res.Substring(fromindex + 10, length);
            string result1 = res.Substring(toindex + 7, 5);
            if (result1 != "FALSE")
                iamtokenid = tokenid;
            else
                iamtokenid = "false";          
        }
        catch (Exception exp)
        {}
     
        return iamtokenid.Trim();
    }

    public string getResponseFromUrl(string url, string param)
    {
        string str = "";
        try
        {
            HttpWebRequest webreq = (HttpWebRequest)WebRequest.Create(url);
            webreq.Method = "POST";
            webreq.ContentType = "application/x-www-form-urlencoded";
            Byte[] byteArray = System.Text.Encoding.UTF8.GetBytes(param);
            System.IO.Stream dataStream = webreq.GetRequestStream();
            dataStream.Write(byteArray, 0, byteArray.Length);
            dataStream.Close();

            WebResponse res = webreq.GetResponse();
            System.IO.Stream stream = res.GetResponseStream();
            StreamReader streamReader = new System.IO.StreamReader(stream);
            str = streamReader.ReadToEnd();
        }
        catch (Exception ex)
        { }

        return str;
    }

}



In adventure games, a text parser takes typed input (a command) from the player and simplifies it to something the game can understand. Usually, words with the same meaning are turned into the same word (e.g. "take" and "get") and certain filler words are dropped (e.g. articles, or the "at" in "look at rock").
The parser makes it easier for the game's author to react on input. The author does not have to write special code to process the commands "get the gem", "take the gem", "get gem", "take gem", "take the precious gem", etc. separately, as the parser will have stripped the input down to something like "take gem".
For the player, the game is more flexible, as the game has a larger vocabulary, and there are fewer guess-the-verb and guess-the-noun problems.
Parsers are used in early interactive fiction games like the Zork series, and more recently in games created by systems like Inform and TADS.


Message object structures can be created in one of two ways: they can be created from whole cloth by instantiating Message objects and stringing them together via attach() and set_payload() calls, or they can be created by parsing a flat text representation of the email message.
The email package provides a standard parser that understands most email document structures, including MIME documents. You can pass the parser a string or a file object, and the parser will return to you the root Message instance of the object structure. For simple, non-MIME messages the payload of this root object will likely be a string containing the text of the message. For MIME messages, the root object will return True from its is_multipart() method, and the subparts can be accessed via the get_payload() and walk() methods.
There are actually two parser interfaces available for use, the classic Parser API and the incremental FeedParser API. The classic Parser API is fine if you have the entire text of the message in memory as a string, or if the entire message lives in a file on the file system. FeedParser is more appropriate for when you’re reading the message from a stream which might block waiting for more input (e.g. reading an email message from a socket). The FeedParser can consume and parse the message incrementally, and only returns the root object when you close the parser [1].
Note that the parser can be extended in limited ways, and of course you can implement your own parser completely from scratch. There is no magical connection between the email package’s bundled parser and the Message class, so your custom parser can create message object trees any way it finds necessary.

Post a Comment

0 Comments