使用C#处理WebBrowser控件在不同域名中的跨域问题

  我们在做web测试时,经常会使用WebBrowser来进行一些自动化的任务。而有些网页上面会用IFrame去嵌套别的页面,这些页面可能不是在相同域名下的,这时就会出现跨域问题,无法直接在WebBrowser中获取到IFrame中的元素。下面来做个试验,自己写个页面嵌套一个百度的首页,然后在我们自己的页面上输入要查询的词,最后在百度上自动完成搜索。

  

复制代码 代码如下:

  <!DOCTYPE html>

  <html lang="en" xmlns="http://www.w3.org/1999/xhtml">

  <head>

  <meta charset="utf-8" />

  <title></title>

  </head>

  <body>

  <iframe id="baidu" style="float:left;" width="500" height="500" src="http://www.baidu.com"></iframe>

  <div>

  测试值:<input id="search" type="text" />

  </div>

  </body>

  </html>

  下面再建一个简单的WinForm工程测试一下,界面如下:

使用C#处理WebBrowser控件在不同域名中的跨域问题

  下面就是WebBrowser的测试代码:

  

复制代码 代码如下:

  using System;

  using System.Windows.Forms;

  namespace WebBrowserTest

  {

  public partial class Form1 : Form

  {

  public Form1()

  {

  InitializeComponent();

  }

  private void button1_Click(object sender, EventArgs e)

  {

  this.webBrowser1.Navigate(this.textBox1.Text);

  }

  private void button2_Click(object sender, EventArgs e)

  {

  var doc = this.webBrowser1.Document;

  var frames = doc.Window.Frames;

  String testValue = doc.GetElementById("search").GetAttribute("value");

  frames[0].Document.GetElementById("kw").SetAttribute("value", testValue);

  frames[0].Document.GetElementById("su").InvokeMember("click");

  }

  }

  }

  我们运行我们的测试程序后,加载之前我们自己写的页面后,在自己的页面上输入我们要查询的词,点击测试按钮,就会看到程序报未处理 UnauthorizedAccessException错误:

使用C#处理WebBrowser控件在不同域名中的跨域问题

  下面来编写一个Helper类来解决这个问题,主要原理大致就是利用IWebBrowser2这个接口来获取Ifream中的Dom,IWebBrowser2中的document可以转换为IHtmlDocument1,IHtmlDocument2,IHtmlDocument3。

  

复制代码 代码如下:

  using System;

  using System.Runtime.InteropServices;

  using System.Windows.Forms;

  using mshtml;

  namespace WebBrowserTest

  {

  // This is the COM IServiceProvider interface, not System.IServiceProvider .Net interface!

  [ComImport(), ComVisible(true), Guid("6D5140C1-7436-11CE-8034-00AA006009FA"),

  InterfaceTypeAttribute(ComInterfaceType.InterfaceIsIUnknown)]

  public interface IServiceProvider

  {

  [return: MarshalAs(UnmanagedType.I4)]

  [PreserveSig]

  int QueryService(ref Guid guidService, ref Guid riid, [MarshalAs(UnmanagedType.Interface)] out object ppvObject);

  }

  public enum OLECMDF

  {

  OLECMDF_DEFHIDEONCTXTMENU = 0x20,

  OLECMDF_ENABLED = 2,

  OLECMDF_INVISIBLE = 0x10,

  OLECMDF_LATCHED = 4,

  OLECMDF_NINCHED = 8,

  OLECMDF_SUPPORTED = 1

  }

  public enum OLECMDID

  {

  OLECMDID_PAGESETUP = 8,

  OLECMDID_PRINT = 6,

  OLECMDID_PRINTPREVIEW = 7,

  OLECMDID_PROPERTIES = 10,

  OLECMDID_SAVEAS = 4

  }

  public enum OLECMDEXECOPT

  {

  OLECMDEXECOPT_DODEFAULT,

  OLECMDEXECOPT_PROMPTUSER,

  OLECMDEXECOPT_DONTPROMPTUSER,

  OLECMDEXECOPT_SHOWHELP

  }

  [ComImport, Guid("D30C1661-CDAF-11d0-8A3E-00C04FC9E26E"), TypeLibType(TypeLibTypeFlags.FOleAutomation | TypeLibTypeFlags.FDual | TypeLibTypeFlags.FHidden)]

  public interface IWebBrowser2

  {

  [DispId(100)]

  void GoBack();

  [DispId(0x65)]

  void GoForward();

  [DispId(0x66)]

  void GoHome();

  [DispId(0x67)]

  void GoSearch();

  [DispId(0x68)]

  void Navigate([In] string Url, [In] ref object flags, [In] ref object targetFrameName, [In] ref object postData, [In] ref object headers);

  [DispId(-550)]

  void Refresh();

  [DispId(0x69)]

  void Refresh2([In] ref object level);

  [DispId(0x6a)]

  void Stop();

  [DispId(200)]

  object Application { [return: MarshalAs(UnmanagedType.IDispatch)] get; }

  [DispId(0xc9)]

  object Parent { [return: MarshalAs(UnmanagedType.IDispatch)] get; }

  [DispId(0xca)]

  object Container { [return: MarshalAs(UnmanagedType.IDispatch)] get; }

  [DispId(0xcb)]

  object Document { [return: MarshalAs(UnmanagedType.IDispatch)] get; }

  [DispId(0xcc)]

  bool TopLevelContainer { get; }

  [DispId(0xcd)]

  string Type { get; }

  [DispId(0xce)]

  int Left { get; set; }

  [DispId(0xcf)]

  int Top { get; set; }

  [DispId(0xd0)]

  int Width { get; set; }

  [DispId(0xd1)]

  int Height { get; set; }

  [DispId(210)]

  string LocationName { get; }

  [DispId(0xd3)]

  string LocationURL { get; }

  [DispId(0xd4)]

  bool Busy { get; }

  [DispId(300)]

  void Quit();

  [DispId(0x12d)]

  void ClientToWindow(out int pcx, out int pcy);

  [DispId(0x12e)]

  void PutProperty([In] string property, [In] object vtValue);

  [DispId(0x12f)]

  object GetProperty([In] string property);

  [DispId(0)]

  string Name { get; }

  [DispId(-515)]

  int HWND { get; }

  [DispId(400)]

  string FullName { get; }

  [DispId(0x191)]

  string Path { get; }

  [DispId(0x192)]

  bool Visible { get; set; }

  [DispId(0x193)]

  bool StatusBar { get; set; }

  [DispId(0x194)]

  string StatusText { get; set; }

  [DispId(0x195)]

  int ToolBar { get; set; }

  [DispId(0x196)]

  bool MenuBar { get; set; }

  [DispId(0x197)]

  bool FullScreen { get; set; }

  [DispId(500)]

  void Navigate2([In] ref object URL, [In] ref object flags, [In] ref object targetFrameName, [In] ref object postData, [In] ref object headers);

  [DispId(0x1f5)]

  OLECMDF QueryStatusWB([In] OLECMDID cmdID);

  [DispId(0x1f6)]

  void ExecWB([In] OLECMDID cmdID, [In] OLECMDEXECOPT cmdexecopt, ref object pvaIn, IntPtr pvaOut);

  [DispId(0x1f7)]

  void ShowBrowserBar([In] ref object pvaClsid, [In] ref object pvarShow, [In] ref object pvarSize);

  [DispId(-525)]

  WebBrowserReadyState ReadyState { get; }

  [DispId(550)]

  bool Offline { get; set; }

  [DispId(0x227)]

  bool Silent { get; set; }

  [DispId(0x228)]

  bool RegisterAsBrowser { get; set; }

  [DispId(0x229)]

  bool RegisterAsDropTarget { get; set; }

  [DispId(0x22a)]

  bool TheaterMode { get; set; }

  [DispId(0x22b)]

  bool AddressBar { get; set; }

  [DispId(0x22c)]

  bool Resizable { get; set; }

  }

  class CorssDomainHelper

  {

  private static Guid IID_IWebBrowserApp = new Guid("0002DF05-0000-0000-C000-000000000046");

  private static Guid IID_IWebBrowser2 = new Guid("D30C1661-CDAF-11D0-8A3E-00C04FC9E26E");

  // Utility for IE cross domain access

  // Returns null in case of failure.

  public static IHTMLDocument3 GetDocumentFromWindow(IHTMLWindow2 htmlWindow)

  {

  if (htmlWindow == null)

  {

  return null;

  }

  // First try the usual way to get the document.

  try

  {

  IHTMLDocument2 doc = htmlWindow.document;

  return (IHTMLDocument3)doc;

  }

  catch (COMException comEx)

  {

  // I think COMException won't be ever fired but just to be sure ...

  }

  catch (UnauthorizedAccessException)

  {

  }

  catch (Exception ex)

  {

  return null;

  }

  // At this point the error was E_ACCESSDENIED because the frame contains a document from another domain.

  // IE tries to prevent a cross frame scripting security issue.

  try

  {

  // Convert IHTMLWindow2 to IWebBrowser2 using IServiceProvider.

  IServiceProvider sp = (IServiceProvider)htmlWindow;

  // Use IServiceProvider.QueryService to get IWebBrowser2 object.

  Object brws = null;

  sp.QueryService(ref IID_IWebBrowserApp, ref IID_IWebBrowser2, out brws);

  // Get the document from IWebBrowser2.

  IWebBrowser2 browser = (IWebBrowser2)(brws);

  return (IHTMLDocument3)browser.Document;

  }

  catch (Exception ex)

  {

  Console.WriteLine(ex);

  }

  return null;

  }

  }

  }

  最后将我们的运行代码改为如下形式,调用Helper类中的GetDocumentFromWindow方法:

  

复制代码 代码如下:

  using System;

  using System.Windows.Forms;

  using mshtml;

  namespace WebBrowserTest

  {

  public partial class Form1 : Form

  {

  public Form1()

  {

  InitializeComponent();

  }

  private void button1_Click(object sender, EventArgs e)

  {

  this.webBrowser1.Navigate(this.textBox1.Text);

  }

  private void button2_Click(object sender, EventArgs e)

  {

  var doc = this.webBrowser1.Document;

  var frames = doc.Window.Frames;

  String testValue = doc.GetElementById("search").GetAttribute("value");

  IHTMLDocument3 baiduDoc = CorssDomainHelper.GetDocumentFromWindow(frames[0].DomWindow as IHTMLWindow2);

  baiduDoc.getElementById("kw").setAttribute("value", testValue);

  baiduDoc.getElementById("su").click();

  }

  }

  }

  最后运行一下程序可以看到我们可以正常获取到百度上的元素了。

使用C#处理WebBrowser控件在不同域名中的跨域问题

  补充一下路过秋天说的问题

  其实关于这些接口其实我也没有很深入的研究过,不过网上倒是能搜到很多相关资料介绍这些接口的不同,我这里给一个链接:

  http://hi.baidu.com/christole/item/1c8dfd1a791a53643f87ced8

  然后关于我上面的代码为什么要使用IHMLDocument3,而不是其它两个接口,因为IHMLDocument3这个接口里面定义了我需要的getElementById这个方法。

  通过查看MSDN,你可以找到你需要的属性或者方法,然后直接在代码里面转换为你需要的类型使用就可以了,它们之间都是可以互相转化的。比如上面我用完了getElementById方法,我需要查看网页的title,那么可以将我上面的baiduDoc变量强制转为IHMLDocument2,然后就可以直接使用它的title属性了。

  参考链接

  http://msdn.microsoft.com/en-us/library/aa752052(v=vs.85).aspx

  http://codecentrix.blogspot.com/2007/10/when-ihtmlwindow2getdocument-returns.html

  http://msdn.microsoft.com/en-us/library/aa752641(v=VS.85).aspx