Selenium Based Scraping with .NET

Script that I wrote when for scraping some agents info for a project from Life Happens.

Scraping was done for different zip codes.

Used C# .NET and Selenium on the project.


public class LifeAgentModel
{
[JsonIgnore]
public string name { get; set; }
public string zipCode { get; set; }
public string detail { get; set; }
public string company { get; set; }
public string phone { get; set; }
public string address { get; set; }
public string firstname { get; set; }
public string lastname { get; set; }
public string middlename { get; set; }
}

 

public static class WebDriverExtensions
{
public static IWebElement FindElement(this IWebDriver driver, By by, int timeoutInSeconds)
{
if (timeoutInSeconds > 0)
{
var wait = new WebDriverWait(driver, TimeSpan.FromSeconds(timeoutInSeconds));
return wait.Until(drv => drv.FindElement(by));
}
return driver.FindElement(by);
}
}

public class LifeHappensScraper
{
static void ScrapeAndAddItems(ChromeDriver driver, string zipCode)
{
var lsResult = new List<LifeAgentModel>();
driver.Navigate().GoToUrl("https://lifehappens.org/agent-locator/");

var input = driver.FindElement(By.XPath("//form[@class='zip']/input[@name='zip']"));
input.Click();
input.SendKeys(zipCode);

var input2 = driver.FindElement(By.XPath("//form[@class='zip']/div/input[@id='a']"));
input2.Click();

var input3 = driver.FindElement(By.XPath("//form[@class='zip']/input[@name='SubmitZip']"));
input3.Click();

driver.Manage().Timeouts().ImplicitWait = TimeSpan.FromSeconds(3);

var loadMoreLink = driver.FindElement(By.XPath("//a[@class='load-more']"));//
loadMoreLink.Click();

var listResult = driver.FindElement(By.XPath("//div[@id='result-1']"), 1);//

var agents = listResult.FindElements(By.XPath("//dt"));

var names = listResult.FindElements(By.XPath("//dt/div[@class='agent-info']/h3[@class='name']"));
var details = listResult.FindElements(By.XPath("//dt/div[@class='agent-info']/h3[@class='name']/span"));
var companies = listResult.FindElements(By.XPath("//dt/div[@class='agent-info']/span"));
var addresses = listResult.FindElements(By.XPath("//dt/a"));
var phones = listResult.FindElements(By.XPath("//dt/div[@class='tel']"));

//Click Load More button to handle client side pagination
var totalLoadMore = names.Count / 10;
if (names.Count % 10 > 0)
{
totalLoadMore = totalLoadMore + 1;
}

for (var i = 0; i < totalLoadMore; i++)
{
loadMoreLink = driver.FindElement(By.XPath("//a[@class='load-more']"));//
loadMoreLink.Click();
driver.Manage().Timeouts().ImplicitWait = TimeSpan.FromSeconds(2);
}

for (int i = 0; i < names.Count; i++)
{
var model = new LifeAgentModel()
{
name = names[i].Text,
detail = i >= details.Count ? "" : details[i].Text,
company = companies[i].Text,
address = addresses[i].Text.Length < 2 ? "..." : addresses[i].Text,
phone = phones[i].Text,
zipCode = zipCode
};

if (model.name.Length > 0)
{
var fullNames = model.name.Split(" ");

model.firstname = fullNames[0];
model.lastname = fullNames[1];

if (fullNames.Length == 3)
{
model.middlename = fullNames[1];
model.lastname = fullNames[2];
}

lsResult.Add(model);
}
}
}