Script that I wrote when for scraping some agents info for a project from Life Happens.

Scraping was done for different zip codes.

Used C# .NET and Selenium on the project.

 

public class LifeAgentModel
{
    [JsonIgnore]
    public string name { get; set; }
    public string zipCode { get; set; }
    public string detail { get; set; }
    public string company { get; set; }
    public string phone { get; set; }
    public string address { get; set; }
    public string firstname { get; set; }
    public string lastname { get; set; }
    public string middlename { get; set; }
}


public static class WebDriverExtensions
{
    public static IWebElement FindElement(this IWebDriver driver, By by, int timeoutInSeconds)
    {
        if (timeoutInSeconds > 0)
        {
            var wait = new WebDriverWait(driver, TimeSpan.FromSeconds(timeoutInSeconds));
            return wait.Until(drv => drv.FindElement(by));
        }
        return driver.FindElement(by);
    }
}

public class LifeHappensScraper
{
    static void ScrapeAndAddItems(ChromeDriver driver, string zipCode)
    {
        var lsResult = new List<LifeAgentModel>();
        driver.Navigate().GoToUrl("https://lifehappens.org/agent-locator/");

        var input = driver.FindElement(By.XPath("//form[@class='zip']/input[@name='zip']"));
        input.Click();
        input.SendKeys(zipCode);

        var input2 = driver.FindElement(By.XPath("//form[@class='zip']/div/input[@id='a']"));
        input2.Click();

        var input3 = driver.FindElement(By.XPath("//form[@class='zip']/input[@name='SubmitZip']"));
        input3.Click();

        driver.Manage().Timeouts().ImplicitWait = TimeSpan.FromSeconds(3);

        var loadMoreLink = driver.FindElement(By.XPath("//a[@class='load-more']"));//
        loadMoreLink.Click();

        var listResult = driver.FindElement(By.XPath("//div[@id='result-1']"), 1);//

        var agents = listResult.FindElements(By.XPath("//dt"));

        var names = listResult.FindElements(By.XPath("//dt/div[@class='agent-info']/h3[@class='name']"));
        var details = listResult.FindElements(By.XPath("//dt/div[@class='agent-info']/h3[@class='name']/span"));
        var companies = listResult.FindElements(By.XPath("//dt/div[@class='agent-info']/span"));
        var addresses = listResult.FindElements(By.XPath("//dt/a"));
        var phones = listResult.FindElements(By.XPath("//dt/div[@class='tel']"));

        //Click Load More button to handle client side pagination
        var totalLoadMore = names.Count / 10;
        if (names.Count % 10 > 0)
        {
            totalLoadMore = totalLoadMore + 1;
        }

        for (var i = 0; i < totalLoadMore; i++)
        {
            loadMoreLink = driver.FindElement(By.XPath("//a[@class='load-more']"));//
            loadMoreLink.Click();
            driver.Manage().Timeouts().ImplicitWait = TimeSpan.FromSeconds(2);
        }

        for (int i = 0; i < names.Count; i++)
        {
            var model = new LifeAgentModel()
            {
                name = names[i].Text,
                detail = i >= details.Count ? "" : details[i].Text,
                company = companies[i].Text,
                address = addresses[i].Text.Length < 2 ? "..." : addresses[i].Text,
                phone = phones[i].Text,
                zipCode = zipCode
            };

            if (model.name.Length > 0)
            {
                var fullNames = model.name.Split(" ");

                model.firstname = fullNames[0];
                model.lastname = fullNames[1];

                if (fullNames.Length == 3)
                {
                    model.middlename = fullNames[1];
                    model.lastname = fullNames[2];
                }

                lsResult.Add(model);
            }
        }
    }

 

Last modified: February 21, 2019

Author