2021年2月6日星期六

c#爬虫-1688官网自动登录

背景

在1688官网里面有很多信息是需要登录才能看得到的,比如商家的联系电话等等。那么我们在抓取它的网页的时候,肯定是需要维持登录状态才能得到对应的内容。这里面就会涉及到自动登录的问题。

登录地址

https://login.1688.com/member/signin.htm

图片

自动登录方法

1、找到对应的元素,账号、密码框。

图片

2、把账号、密码值带进去。

 List<string> logininfolist = new List<string>();    string file = "1688Account.json";    if (!File.Exists(file))    {     throw new ArgumentException("1688Account not found");    }    string data = File.ReadAllText(file, Encoding.UTF8);    var account1688list = JsonConvert.DeserializeObject<List<Account1688Item>>(data);    logininfolist.AddRange(account1688list.Select(o => o.AccountPassword).ToList());    Random rdinfo = new Random();    int indexinfo = rdinfo.Next(logininfolist.Count);    var modelinfo = logininfolist[indexinfo];    driver.FindElement(By.Id("fm-login-id")).SendKeys(modelinfo.Split('+')[0]);    driver.FindElement(By.Id("fm-login-password")).SendKeys(modelinfo.Split('+')[1]);

3、模拟点击提交按钮操作。

  // Thread.Sleep(1000 * 30); //30时间操作    driver.FindElement(By.ClassName("password-login")).Click();    Thread.Sleep(1000 * 30);//30时间操作

4、进入控制台,验证是否登陆成功。并记录Cookies,下次再来的时候,直接使用现成的Cookies,一般有效期可以维持一天多左右。

 driver.Navigate().GoToUrl("https://work.1688.com/?tracelog=login_target_is_blank_1688");    Thread.Sleep(1000 * 5);    if (driver.Url.Contains("login.1688.com"))    {     Console.WriteLine("登录失败");     CookieHelp.DeleteCookies();     Console.WriteLine("2");     throw new Exception("重新登录");    }    driver.Navigate().Refresh();    CookieHelp.WriteCookies(driver.Manage().Cookies.AllCookies);

5、判断是否有现成的登录cookies。

 driver.Navigate().GoToUrl("https://www.1688.com/");   driver.Manage().Cookies.DeleteAllCookies();   var listCookie = CookieHelp.GetCookie();   if (listCookie != null)   {    logintry = 0;    Console.WriteLine("有现成cookies" + DateTime.UtcNow);    foreach (var item in listCookie)    {     driver.Manage().Cookies.AddCookie(new Cookie(item.Name, item.Value, item.Domain, item.Path, item.Expiry));    }    Thread.Sleep(2000);    driver.Navigate().GoToUrl("https://work.1688.com/?tracelog=login_target_is_blank_1688");    Thread.Sleep(1000 * 2);

完整代码

 /// <summary>  /// 登录 todo  /// </summary>  /// <param name="_reptilesImageSearchService"></param>  /// <param name="options"></param>  /// <param name="driver"></param>  public void Implement(IReptilesImageSearchService _reptilesImageSearchService, IWebDriver driver)  {   driver.Navigate().GoToUrl("https://www.1688.com/");   driver.Manage().Cookies.DeleteAllCookies();   var listCookie = CookieHelp.GetCookie();   if (listCookie != null)   {    logintry = 0;    Console.WriteLine("有现成cookies" + DateTime.UtcNow);    foreach (var item in listCookie)    {     driver.Manage().Cookies.AddCookie(new Cookie(item.Name, item.Value, item.Domain, item.Path, item.Expiry));    }    Thread.Sleep(2000);    driver.Navigate().GoToUrl("https://work.1688.com/?tracelog=login_target_is_blank_1688");    Thread.Sleep(1000 * 2);    if (driver.Url.Contains("login.1688.com"))    {     Console.WriteLine("cookies过期了");     CookieHelp.DeleteCookies();     Console.WriteLine("1");     throw new Exception("重新登录");    }   }   else   {    if (logintry > 4)    {     Console.WriteLine("登陆次数超出:" + logintry);     throw new Exception("登陆次数超出,退出");    }    logintry++;    Console.WriteLine("无现成cookies" + DateTime.UtcNow);    driver.Navigate().GoToUrl("https://login.1688.com/member/signin.htm");    #region 登录动作    driver.SwitchTo().Frame(0);    IJavaScriptExecutor js = (IJavaScriptExecutor)driver;    // string returnjs = (string)js.ExecuteScript("Object.defineProperties(navigator, {webdriver:{get:()=>undefined}});");    string jsfile = File.ReadAllText("stealth.min.js", Encoding.UTF8);    string returnjs = (string)js.ExecuteScript(jsfile);    List<string> logininfolist = new List<string>();    string file = "1688Account.json";    if (!File.Exists(file))    {     throw new ArgumentException("1688Account not found");    }    string data = File.ReadAllText(file, Encoding.UTF8);    var account1688list = JsonConvert.DeserializeObject<List<Account1688Item>>(data);    logininfolist.AddRange(account1688list.Select(o => o.AccountPassword).ToList());    Random rdinfo = new Random();    int indexinfo = rdinfo.Next(logininfolist.Count);    var modelinfo = logininfolist[indexinfo];    driver.FindElement(By.Id("fm-login-id")).SendKeys(modelinfo.Split('+')[0]);    driver.FindElement(By.Id("fm-login-password")).SendKeys(modelinfo.Split('+')[1]);    // Thread.Sleep(1000 * 30); //30时间操作    driver.FindElement(By.ClassName("password-login")).Click();    Thread.Sleep(1000 * 30);//30时间操作    

没有评论:

发表评论