[重点]delphi 实现 根据给定的标题去《中国青年报》网上电子报数据中查找匹配的内容,并从该内容中取出引题、正题、副题、作者和正文。

[重点]delphi 实现 根据给定的标题去《中国青年报》网上电子报数据中查找匹配的内容,并从该内容中取出引题、正题、副题、作者和正文。项目要求:根据给定的标题去《中国青年报》网上电子报数据中查找匹配的内容,并从该内容中取出引题、正题、作者和正文。

大家好,又见面了,我是你们的朋友全栈君。

项目要求:根据给定的标题去《中国青年报》网上电子报数据中查找匹配的内容,并从该内容中取出引题、正题、作者和正文。


unit Unit1;

interface

uses
  Winapi.Windows, Winapi.Messages, System.SysUtils, System.Variants, System.Classes, Vcl.Graphics,
  Vcl.Controls, Vcl.Forms, Vcl.Dialogs, Vcl.StdCtrls, IdBaseComponent,
  IdComponent, IdTCPConnection, IdTCPClient, IdHTTP, Vcl.ComCtrls;

type
  TForm1 = class(TForm)
    IdHTTP1: TIdHTTP;
    Button1: TButton;
    Label1: TLabel;
    Edit1: TEdit;
    ProgressBar1: TProgressBar;
    Memo1: TMemo;
    Button2: TButton;
    Memo2: TMemo;
    procedure Button1Click(Sender: TObject);
    procedure FormCreate(Sender: TObject);
    procedure Button2Click(Sender: TObject);
  private
    { Private declarations }
  public
    { Public declarations }
  end;

var
  Form1: TForm1;

implementation
       uses StrUtils,HttpApp;
{$R *.dfm}

type
  TDelFlags = set of (dfDelBefore, dfDelAfter);


function Delstr(var ms: String; endstr: String; Flags: TDelFlags;
  bself: Boolean = True): String;
var
  l: Integer;
begin
  l := length(endstr);
  if dfDelBefore in Flags then
  begin
    if bself then
    begin
      Result := copy(ms, 1, pos(endstr, ms) + l - 1);
      Delete(ms, 1, pos(endstr, ms) + l - 1);
    end
    else
    begin
      Result := copy(ms, 1, pos(endstr, ms) - 1);
      Delete(ms, 1, pos(endstr, ms) - 1);
    end;
  end
  else
  begin
    if bself then
    begin
      Result := copy(ms, pos(endstr, ms), length(ms));
      Delete(ms, pos(endstr, ms), length(ms));
    end
    else
    begin
      Result := copy(ms, pos(endstr, ms) + l, length(ms));
      Delete(ms, pos(endstr, ms) + l, length(ms));
    end;
  end;
end;

procedure DelstrEx(var ms: String; endstr: String;
  var DelData: String; Flags: TDelFlags; bself: Boolean = True);
var
  l: Integer;
begin
  l := length(endstr);
  if dfDelBefore in Flags then
  begin           //删除字符串的前半部分
    if bself then //连同自己一起删除
    begin
      DelData := copy(ms, 1, pos(endstr, ms) + l - 1);
      Delete(ms, 1, pos(endstr, ms) + l - 1);
    end
    else
    begin
      DelData := copy(ms, pos(endstr, ms) - 1, length(ms));
      Delete(ms, 1, pos(endstr, ms) - 1);
    end;
  end
  else
  begin
    if bself then
    begin
      DelData := copy(ms, pos(endstr, ms), length(ms));
      Delete(ms, pos(endstr, ms), length(ms)); //连同自己一起删除
    end
    else
    begin
      DelData := copy(ms, pos(endstr, ms) + l, length(ms));
      Delete(ms, pos(endstr, ms) + l, length(ms));
    end;
  end;
end; {DelstrEx}


function GetCenterStr(src, str1, str2: String): String;
var
  i, i2, i3: Integer;
begin
  i := 0;
  i2 := 0;
  i3 := 0;
  Delstr(src, str1, [dfDelBefore]);
  i := pos(AnsiLowercase(str1), AnsiLowercase(src));
  i3 := pos(AnsiLowercase(str2), AnsiLowercase(src));
  Result := copy(src, i2 + 1, i3 - i2 - 1);
end;


function delstrByNum(ss:string;uniqueFlag:string;disapperNum:integer;FromFlags: TDelFlags;bReturnDeletedPart:boolean):string;
var _num:integer;
    _Str:string;
begin
     _num:=0;
     _Str:=ss;

     result:='';

     while _num<disapperNum do
     begin
         if dfDelBefore in FromFlags then   //从字符串左端开始删除
         begin
            delstr(_Str,uniqueFlag,FromFlags);
         end
         else
         begin  //从字符串右端开始删除
           _Str:= StrUtils.ReverseString(_Str) ;

           if bReturnDeletedPart then
              delstrEx(_Str,StrUtils.ReverseString(uniqueFlag),result,[dfdelbefore])
           else
              delstr(_Str,StrUtils.ReverseString(uniqueFlag),[dfdelbefore]);

             _Str:= StrUtils.ReverseString(_Str) ;
         end;

          inc(_num);
     end;

     if result='' then result:=_Str
     else  result:= StrUtils.ReverseString(result) ;
end;




function Matchstrings(Source, pattern: String): Boolean;
var
  pSource: array[0..255] of Char;
  pPattern: array[0..255] of Char;
  function MatchPattern(element, pattern: PChar): Boolean;
    function IsPatternWild(pattern: PChar): Boolean;
    begin
      Result := StrScan(pattern, '*') <> nil;
      if not Result then
        Result := StrScan(pattern, '?') <> nil;
    end;
  begin
    if 0 = StrComp(pattern, '*') then
      Result := True
    else if (element^ = Chr(0)) and (pattern^ <> Chr(0)) then
      Result := False
    else if element^ = Chr(0) then
      Result := True
    else
    begin
      case pattern^ of
        '*':
          if MatchPattern(element, @pattern[1]) then
            Result := True
          else
            Result := MatchPattern(@element[1], pattern);
          '?':
          Result := MatchPattern(@element[1], @pattern[1]);
        else
          if element^ = pattern^ then
            Result := MatchPattern(@element[1], @pattern[1])
          else
            Result := False;
      end;
    end;
  end;
begin
  StrPCopy(pSource, Source);
  StrPCopy(pPattern, pattern);
  Result := MatchPattern(pSource, pPattern);
end; {匹配字符串函数}


{从磁盘中搜索指定类型的所有文件}
procedure FindFiles(ASourceDir, SearchFileType: String; var List: TStrings);
var
  FileRec: TSearchrec;
  Sour, OldFileName, NewFileName: String;
  fs: TFileStream;
begin
  Sour := ASourceDir;
  if Sour[length(Sour)] <> '\' then
    Sour := Sour + '\';
  if FindFirst(Sour + '*.*', faAnyfile, FileRec) = 0 then
    {循环}
    repeat
      if ((FileRec.Attr and faDirectory) <> 0) then
      begin
        if (FileRec.Name <> '.') and (FileRec.Name <> '..') then //找到目录
        begin
          FindFiles(Sour + FileRec.Name, SearchFileType, List);
        end;
      end
      else //找到文件
      begin
        if Matchstrings(LowerCase(FileRec.Name), SearchFileType) then
        begin
          List.Add(Sour + FileRec.Name);
        end; {拷贝所有类型的文件}
      end;
    until FindNext(FileRec) <> 0;
  system.SysUtils.FindClose(FileRec);
end; {从磁盘中搜索指定类型的所有文件}



procedure RmHtmlTags(var src: string);
  function DelTag(var src: string): boolean;
  var
    iPosS, iPosE: integer;
  begin
    result := False;
    if pos('<script', AnsiLowerCase(src)) > 0 then
      begin
        iPosS := pos('<script', AnsiLowerCase(src));
        if iPosS > 0 then
          begin
            iPosE := pos('</script>', AnsiLowerCase(src));
            result := iPosE > iPosS;
            if result then
              Delete(src, iPosS, iPosE - iPosS + 9);
          end;
      end
    else
      begin
        iPosS := pos('<', src);
        if iPosS > 0 then
          begin
            iPosE := pos('>', src);
            result := iPosE > iPosS;
            if result then
              Delete(src, iPosS, iPosE - iPosS + 1);
          end;
      end;
  end;
begin
  //src := LowerCase(src);
  src := src;
  repeat
  until not DelTag(src);
end;

procedure RmHtmlTagsEx(var src: string);
  function DelTag(var src: string): boolean;
  var
    iPosS, iPosE: integer;
  begin
    result := False;
    if pos('<script', AnsiLowerCase(src)) > 0 then
      begin
        iPosS := pos('<script', AnsiLowerCase(src));
        if iPosS > 0 then
          begin
            iPosE := pos('</script>', AnsiLowerCase(src));
            result := iPosE > iPosS;
            if result then
              Delete(src, iPosS, iPosE - iPosS + 9);
          end;
      end
    else
    if pos('<style', AnsiLowerCase(src)) > 0 then
      begin
        iPosS := pos('<style', AnsiLowerCase(src));
        if iPosS > 0 then
          begin
            iPosE := pos('</style>', AnsiLowerCase(src));
            result := iPosE > iPosS;
            if result then
              Delete(src, iPosS, iPosE - iPosS + 9);
          end;
      end
    else
      begin
       { iPosS := pos('<', src);
        if iPosS > 0 then
          begin
            iPosE := pos('>', src);
            result := iPosE > iPosS;
            if result then
              Delete(src, iPosS, iPosE - iPosS + 1);
          end; }
      end;
  end;
begin
  //src := LowerCase(src);
  src := src;
  repeat
  until not DelTag(src);
end;


function UrlDecoder(const AUrl:string):string;
begin
  result:= UTF8Decode(HttpDecode(AUrl));
end;

function UrlEncoder(const AUrl:string):string;
begin
//URL编码通常使用“+”来替换空格。
  result:=HttpEncode(UTF8Encode(AUrl));
end;


function  getResURL(http:TIdHttp;searchWord:string):string;
var info:tstringlist;
   res:tstringstream;
   tURL:string;
  MemoText: string;
begin
   http.HandleRedirects:=true;
   http.Request.UserAgent:='Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; InfoPath.2)';
   http.Request.Host:='search.cyol.com';
   http.Request.ContentType:='application/x-www-form-urlencoded';
   http.Request.Referer:='http://search.cyol.com/index.htm';
   http.request.CacheControl:='no-cache';
   http.HTTPOptions:=http.HTTPOptions+[hoKeepOrigProtocol];

   try
      info:=tstringlist.Create;
     res:=tstringstream.Create('',TEncoding.UTF8);

    {
      info.Add('op=new');
     info.Add('searchBtn=搜索');
     info.Add('searchText='+searchWord); //全站内模糊搜索
     // info.Add('searchText=一日为师 终身挨骂?');
    }
     info.Add('ak=');
     info.Add('ck=');
     info.Add('df=');
     info.Add('dt=');
     info.Add('nk=4');
     info.Add('od=date');
     info.Add('op=adv');
     info.Add('tk='+searchWord);

     tURL:='http://search.cyol.com/searchh.jsp';
     http.Post(tURL,info,res);
     MemoText:= res.DataString;

     delstr(MemoText,'resultdiv',[dfdelbefore]);

     //showmessage(MemoText);

     if pos('color:red',ansilowercase(MemoText))=0 then
     begin
          result:='';
          Exit;
     end;


     delstr(MemoText,'>',[dfdelbefore]);
     delstr(MemoText,'<a',[dfdelbefore]);
     delstr(MemoText,'http:',[dfdelbefore],false);
     delstr(MemoText,'.htm',[dfdelafter],false);


     result:=MemoText;


   finally
      freeandnil(info);
      freeandnil(res);
      //http.Free;
   end;
end;

function getHtmlStr(http:TIdHttp;fURL:string):string;
begin
   if assigned(http) and (http is TIdHttp) and (http<>nil) then
    result:=  http.Get(fURL);
end;



procedure TForm1.Button1Click(Sender: TObject);

var htmlText:string;
  biaoti: string;
  Author: string;
  yinti: string;
  table_Pos: Integer;
  ss: string;
  outdata: string;
  neirong: string;
  zhenwen: string;
  frontPart: string;
  subtitle: string;
  txtList: TStrings;
  i: Integer;
  readtxt: TStringList;
  zhenti: string;
  resURL: string;

begin
   button1.Caption:='正在处理'; button1.Enabled:=false;

 { htmlText:=  getHtmlStr(idHTTP1, getResURL(idHTTP1,'一日为师 终身挨骂?') );

  frontPart:=htmlText;

  delstr(frontPart,'<!--enpproperty',[dfdelbefore]);
  delstr(frontPart,'/enpproperty',[dfdelafter]);

  Author:=  GetCenterStr(frontPart,'<author>','</author>');    //作者
  subtitle:=  GetCenterStr(frontPart,'<subtitle>','</subtitle>'); //副题
  yinti:=  GetCenterStr(frontPart,'<introtitle>','</introtitle>');  //引题


  //取正文
  zhenwen:=htmlText;
  delstr(zhenwen,'<!--enpcontent-->',[dfdelbefore]);
  delstr(zhenwen,'<!--/enpcontent-->',[dfdelafter]);
  Memo1.Text:=zhenwen;

  }

  if not directoryExists(edit1.Text) then
  begin

     showmessage('请输入标引txt的路径!');
    exit;
  end;


  txtList:=tstringlist.Create ;
  readtxt:=TStringlist.Create ;
  findfiles(edit1.Text,'*.txt',txtList);

  ProgressBar1.Position:=0;
  ProgressBar1.Max:=txtlist.Count;



  try

  for i := 0 to txtList.Count-1 do
  begin
       application.ProcessMessages ;
       ProgressBar1.Position:=i+1;

       readtxt.LoadFromFile(txtList[i]);

        zhenti:=readtxt.Values['<主题>'];

        htmlText:='';  zhenwen:='';
        author:='';subtitle:=''; yinti:='';


        resURL:=getResURL(idHTTP1,trim(zhenti));

        if ''<>trim(resURL) then
        begin

            htmlText:=  getHtmlStr(idHTTP1,  resURL);

            frontPart:=htmlText;

            delstr(frontPart,'<!--enpproperty',[dfdelbefore]);
            delstr(frontPart,'/enpproperty',[dfdelafter]);

            Author:=  GetCenterStr(frontPart,'<author>','</author>');    //作者
            subtitle:=  GetCenterStr(frontPart,'<subtitle>','</subtitle>'); //副题
            yinti:=  GetCenterStr(frontPart,'<introtitle>','</introtitle>');  //引题

            //取正文
            zhenwen:=htmlText;
            delstr(zhenwen,'<!--enpcontent-->',[dfdelbefore]);
            delstr(zhenwen,'<!--/enpcontent-->',[dfdelafter]);

            RmHtmlTagsEx(zhenwen);

            if ''<>trim(yinti) then readtxt.Values['<引题>']:=yinti;
            if ''<>trim(subtitle) then readtxt.Values['<副题>']:=subtitle;
            if ''<>trim(author) then readtxt.Values['<作者>']:=author;
            if ''<>trim(zhenwen) then readtxt.Values['<正文>']:=slinebreak+trim(zhenwen);

            readtxt.SaveToFile(txtList[i]);

            readtxt.Clear ;
        end
        else
        begin
             Memo2.Lines.Add('未找到对应数据:'+txtList[i]);
        end;

  end; // for i end

  if ProgressBar1.Max=ProgressBar1.Position then
  begin
      showmessage('处理完成!');
  end;
  finally
     button1.Caption:='开始处理'; button1.Enabled:=true;
      freeandnil(readtxt);
      freeandnil(txtlist);
  end;










{  delstr(htmlText,'<body',[dfdelbefore]);
  biaoti:='biaoti';
  //取作者
  Author:=htmlText;
  delstr(Author,biaoti,[dfdelbefore]);
  delstr(Author,'rc-writer',[dfdelbefore]);
  delstr(Author,'>',[dfdelbefore]);
  delstr(Author,'<',[dfdelafter]);

  showmessage(Author);

  //取引题
  yinti:=htmlText;
  delstr(yinti,biaoti,[dfdelafter]);
  table_Pos:=0;
 //example:   ss:='<table>ccc</table><table>ddd</table>';
   yinti:=delstrByNum(yinti,'<table',1,[dfdelafter],true)+'>';
   RmHtmlTags(yinti);
   showmessage(yinti );

 //取正文内容
 neirong:='neirong';
 zhenwen:=htmlText;
 delstr(zhenwen,neirong,[dfdelbefore]);
 delstr(zhenwen,'<P',[dfdelbefore],false);
 delstr(zhenwen,'<script',[dfdelafter]);
 Memo1.Text:=zhenwen;
 }











end;

procedure TForm1.Button2Click(Sender: TObject);
var
  ss: string;
begin
   ss:=Memo1.Text;
   RmHtmlTagsEx(ss);
   memo1.Text:=ss;
end;

procedure TForm1.FormCreate(Sender: TObject);
begin
edit1.Clear ;
memo2.Clear ;
end;

end.


 

 

 

版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请联系我们举报,一经查实,本站将立刻删除。

发布者:全栈程序员-站长,转载请注明出处:https://javaforall.net/154654.html原文链接:https://javaforall.net

(0)
全栈程序员-站长的头像全栈程序员-站长


相关推荐

  • connectionstring

    connectionstring在ASP.NET开发的网站根目录,有一个名为web.config的文件,顾名思义,这是为整个网站进行配置的文件,其格式为XML格式。这里主要谈谈文件中的节。节是对连接到数据库的字符串进行配置,由于M

    2022年6月30日
    21
  • Ubuntu 64位 16.04 Minnet安装及测试,可视化工具调试,抓包工具wireshark安装及测试

    Ubuntu 64位 16.04 Minnet安装及测试,可视化工具调试,抓包工具wireshark安装及测试

    2021年10月6日
    65
  • 散列的基本概念

    散列的基本概念散列的基本概念什么是散列?为什么需要散列?散列是一种思想。与已经学过的其他数据结构相比较,向量是采用循秩访问(callbyrank)的访问方式,列表是采用循位置访问(callbyposition)的访问方式,二叉搜索树是采用循关键码访问(callbykey)的访问方式,散列与他们都不一样,是采用循值访问(callbyvalue)的访问方式。举个例子,你现在身处同济大学嘉定…

    2022年5月15日
    39
  • 学英语网络资源推荐

    学英语网络资源推荐1.推荐标准和目的2.中国十佳英语学习网站推荐3.学英语Web资源推荐4.在线辞典大全5.语法网站列表6.英语新闻网站大全7.英文电影网站大全返回1.推荐标准和目的希望能对了解英语(文化)不多的国内英语初学者(尤其是社会人士)有所帮助。希望打开初学者利用Internet学习英语的…

    2022年5月25日
    35
  • 关闭防火墙 linux_linux系统防火墙关闭

    关闭防火墙 linux_linux系统防火墙关闭抛开实际生产环境个人平时练习的时候安装虚拟机可能遇到过很多坑就很烦,可能很大一部分原因都是防火墙没关掉哈哈哈哈所以建议永久性关闭防火墙下面是CentOs7关闭防火墙的命令!1:查看防火状态systemctlstatusfirewalld如果是这样就开着呢如果是这样就是关着2:暂时关闭防火墙systemctlstopfirewalld3:重启防火墙systemctlenablefirewalld5:永久关闭后重启Linux永久关闭防火墙firewalld和selli

    2022年9月6日
    6
  • java 动态库卸载_java 卸载动态链接库

    java 动态库卸载_java 卸载动态链接库importjava.lang.reflect.Field;importjava.lang.reflect.Method;importjava.util.Iterator;importjava.util.Vector;publicclassFreeDynamicDll{static{//首先确保这些dll文件存在System.load(“c:/test/Decode.dll”);…

    2022年5月12日
    42

发表回复

您的邮箱地址不会被公开。 必填项已用 * 标注

关注全栈程序员社区公众号