Search results for 'IdHTTP'. 1 post(s) found.

  1. 2007/09/18 Retrieving all image links from an HTML document
2007/09/18 08:34

Retrieving all image links from an HTML document


Here's how to obtain all image links from an HTML document. The GetImageLinks procedure fills a TStrings object with the value of the SRC property of the IMG HTML element.
Note: if the images have relative links, you will have to parse the document url and prepend that to the image src.

Tip submitted by "mrbaseball34" (an About Delphi Programming member).

uses ... mshtml, ActiveX, COMObj, IdHTTP, idURI;

procedure GetImageLinks(AURL: String; AList: TStrings) ;
var
  IDoc : IHTMLDocument2;
  strHTML : String;
  v : Variant;
  x : integer;
  ovLinks : OleVariant;
  DocURL : String;
  URI : TidURI;
  ImgURL : String;
  IdHTTP : TidHTTP;
begin
  AList.Clear;
  URI := TidURI.Create(AURL) ;
  try
    DocURL := 'http://' + URI.Host;
    if URI.Path <> '/' then
      DocURL := DocURL + URI.Path;
  finally
    URI.Free;
  end;
  Idoc:=CreateComObject(Class_HTMLDOcument) as IHTMLDocument2;
  try
    IDoc.designMode:='on';
    while IDoc.readyState<>'complete' do
      Application.ProcessMessages;
    v:=VarArrayCreate([0,0],VarVariant) ;
    IdHTTP := TidHTTP.Create(nil) ;
    try
      strHTML := IdHTTP.Get(AURL) ;
    finally
      IdHTTP.Free;
    end;
    v[0]:= strHTML;
    IDoc.write(PSafeArray(System.TVarData(v).VArray)) ;
    IDoc.designMode:='off';
    while IDoc.readyState<>'complete' do
      Application.ProcessMessages;
    ovLinks := IDoc.all.tags('IMG') ;
    if ovLinks.Length > 0 then
    begin
      for x := 0 to ovLinks.Length-1 do
      begin
        ImgURL := ovLinks.Item(x).src;
        // The stuff below will probably need a little tweaking
        // Deteriming and turning realtive URLs into absolute URLs
        // is not that difficult but this is all I could come up with
        // in such a short notice.
        if (ImgURL[1] = '/') then
        begin
          // more than likely a relative URL so
          // append the DocURL
          ImgURL := DocURL + ImgUrl;
        end
        else
        begin
          if (Copy(ImgURL, 1, 11) = 'about:blank') then
          begin
            ImgURL := DocURL + Copy(ImgUrl, 12, Length(ImgURL)) ;
          end;
        end;
        AList.Add(ImgURL) ;
      end;
    end;
  finally
    IDoc := nil;
  end;
end;

//Usage
procedure TForm1.Button1Click(Sender: TObject) ;
begin
  GetImageLinks('http://kurapa.com', Memo1.Lines) ;
end;
Trackback 3 Comment 0

Trackback : Cannot send a trackbact to this post.

  1. Subject different money making ideas

    Tracked from moneyideas 2010/01/28 23:05 delete

    moneyideas

  2. Subject different money making ideas

    Tracked from moneyideas 2010/01/29 07:09 delete

    moneyideas

  3. Subject different money making ideas

    Tracked from moneyideas 2010/01/31 16:40 delete

    moneyideas