#include #include #include #define TITLEMAX 350 #define LINKTITLEMAX 350 #define AUTHORMAX 350 #define LINKMAX 500 #define DESCRIPTIONMAX 2500 // 250 Textbuffer #define CONTENTMAX 2500 // 250 #define PUBDATEMAX 350 #define NEWSSERVICEMAX 350 char TITLE[TITLEMAX]; char LINKTITLE[LINKTITLEMAX]; char AUTHOR[AUTHORMAX]; char LINK[LINKMAX]; char DESCRIPTION[DESCRIPTIONMAX]; char CONTENT[CONTENTMAX]; char PUBDATE[PUBDATEMAX]; char NEWSSERVICE[NEWSSERVICEMAX]; char NEWSAUTHOR[NEWSSERVICEMAX]; #define TagBufferlength 1001 int Tag; char TagBuffer[TagBufferlength]; int TagBufferLoad; // 1= correct loaded 0 not fully loaded ! int TagEndSlash; // ...> = 0 .../> =1; int Mode=0; // channel, entry int SubMode=0; // author, content, etc int FileEnd=0; int XML=0; // rss, feed, rdf #define varlength 30 char charset[varlength]; char xmlversion[varlength]; char rssversion[varlength]; #define TXT 1 // #define HTML 2 // char type[varlength]; // TEXT, HTML in int TYPE=TXT; // int BACKTYPE; // int READHTML=0; // HTML||XHTML = 1 , TXT=0 int CDATA=0; // Flag for int Tempfile=0; // flag for Tempfile int Hlineflag=0; #define xml 1 // rss tags #define rss 2 #define channel 3 #define title 4 #define link 5 #define description 6 #define item 7 #define pubDate 8 #define author 9 #define image 10 // feed tags #define feed 11 #define entry 12 #define name 13 #define id 14 #define summary 15 #define updated 16 #define content 17 #define email 18 #define subtitle 19 // rdf tags #define rdf 20 #define source 21 #define date 22 #define publisher 23 #define creator 24 // rss tags #define slash_rss 102 #define slash_channel 103 #define slash_title 104 #define slash_link 105 #define slash_description 106 #define slash_item 107 #define slash_pubDate 108 #define slash_author 109 #define slash_image 110 // feed tags #define slash_feed 111 #define slash_entry 112 #define slash_name 113 #define slash_id 114 #define slash_summary 115 #define slash_updated 116 #define slash_content 117 #define slash_email 118 #define slash_subtitle 119 // rdf tags #define slash_rdf 120 #define slash_source 121 #define slash_date 122 #define slash_publisher 123 #define slash_creator 124 #define TagLength 15 #define TagMax 25 typedef char rssstr[TagLength]; rssstr rsstag[TagMax]={"","?xml","rss","channel","title","link","description","item","pubDate","author","image","feed","entry","name","id","summary","updated","content","email","subtitle","rdf:RDF","dc:source","dc:date","dc:publisher","dc:creator"}; FILE *fopen(),*in,*out,*tmp; char ArachnePath[30]; int filenumber=0; char character; int loadchar=1; // Some prototypes void SaveContentToFile(int Descriptionback); // save content to file void CutString(char *p,int length); // Cut string for HTML-output void ClearParameter(void); // int LoadTagBuffer(void); int LoadVar(char* Var,char *str,int vlength); int JumpToSlash(int SearchTag); int JumpTagEnd(void); void OutEnd(void); void OutHead(void); void OutItemrss(void); void LoadHtmlTag(char *p,char *buffer); // int SweepHtml(char *p); int LoadTxt(char *text,int strlenmax,int i); void SaveContentToFile(int back) { char filename[90],*p; filenumber++; if(filenumber>99)return; sprintf(filename,"%scache\\feed%d.htm",ArachnePath,filenumber); if(strlen(AUTHOR)<2)strcpy(AUTHOR,NEWSAUTHOR); if(strlen(AUTHOR)<2)strcpy(AUTHOR,NEWSSERVICE); CutString(AUTHOR,400); out=fopen(filename,"w"); if(out) {fprintf(out,"%s \n",TITLE); if(charset[0])fprintf(out,"\n",34,34,34,charset,34); fprintf(out,"\n"); fprintf(out,"\n"); fprintf(out,"
%s\n",AUTHOR); fprintf(out,"Arachne "); fprintf(out,"Feedreader\n

\n"); fprintf(out,"%s",CONTENT); // ------------ while(back==2) {back=LoadTxt(CONTENT,CONTENTMAX,2); fprintf(out,"%s",CONTENT); } // ------------ fprintf(out,"\n
Generated by rss2htm.exe"); fclose(out); sprintf(CONTENT,"Content: local file",34,filename,34); } return; } void SaveToTempFile(int back) { char filename[90]; sprintf(filename,"%scache\\tempfeed.txt",ArachnePath); tmp=fopen(filename,"w"); if(tmp)fprintf(tmp,"%s",DESCRIPTION); {while(back==2) {back=LoadTxt(DESCRIPTION,DESCRIPTIONMAX,2); // SweepHtml(DESCRIPTION); if(tmp)fprintf(tmp,"%s",DESCRIPTION); } } if(tmp)Tempfile=1; if(tmp)fclose(tmp); return; } void LoadTempFile() { char filename[90]; char character1; sprintf(filename,"%scache\\tempfeed.txt",ArachnePath); tmp=fopen(filename,"r"); if(tmp) { while((character1=getc(tmp))!=EOF) printf("%c",character1); fclose(tmp); Tempfile=0; } return; } void CutString(char *p,int length) // Cut string for HTML-output { char letter; int pixel=0; length=length-35; while((letter=*p)!=0) {if((letter>'?')&&(letter<']'))pixel=pixel+9; // upper letter else pixel=pixel+6; // lower letter if(letter=='W')pixel=pixel+4; if(letter=='M')pixel=pixel+3; if(letter=='V')pixel=pixel+3; if(letter=='I')pixel=pixel-4; if(letter=='J')pixel=pixel-4; if(letter=='L')pixel=pixel-4; if(letter=='T')pixel=pixel-4; if(letter=='R')pixel=pixel-3; if(letter=='w')pixel=pixel+3; if(letter=='m')pixel=pixel+4; if(letter=='v')pixel=pixel+3; if(letter=='i')pixel=pixel-3; if(letter=='j')pixel=pixel-3; if(letter=='l')pixel=pixel-3; if(letter=='t')pixel=pixel-3; if(letter=='r')pixel=pixel-3; if(letter=='v')pixel=pixel-3; if(pixel>length){*(p-2)='.';*(p-1)='.';*p=0;return;} p++; } return; } void ClearParameter() {AUTHOR[0]=0;LINKTITLE[0]=0;LINK[0]=0;DESCRIPTION[0]=0; CONTENT[0]=0;PUBDATE[0]=0;} int LoadVar(char* Var,char *str,int vlength) { char *p,*pv; int counter=1; pv=Var; *pv=0; p=strstr(TagBuffer,str); if(!p)return 0; while(*p!=0){if(*p=='"')break;p++;} // search first " p++; while(*p!=0) {if(*p=='>'){*Var=0;return 0;} if(*p=='"') {*pv=0;return 1;} *pv=*p;p++;pv++; counter++; if(counter==vlength){*Var=0;return 0;} } *Var=0; return 0; } int JumpToSlash(int SearchTag) { SearchTag=SearchTag+100; weiter: character=getc(in); if(character==EOF){FileEnd=1;return 0;} if(character!='<')goto weiter; character=getc(in); if(character==EOF){FileEnd=1;return 0;} Tag=LoadTagBuffer(); if(FileEnd)return 0; if(!TagBufferLoad)JumpTagEnd(); if(Tag==SearchTag)return 1; goto weiter; } int JumpTagEnd() { int counter=1; // counts < up and > down char character2; // for endslashdetection ... /> character2=character; while(character!=EOF) {if(character=='>')counter--; if(character=='<')counter++; if(counter==0){if(character2=='/')TagEndSlash=1;return 1;} character2=character; character=getc(in); } FileEnd=1; return 0; } void OutEnd() { if((XML==rss)||(XML==feed)||(XML==rdf)) {if(!Hlineflag)printf("
\n"); printf("\n"); printf("
Generated by rss2htm.exe\n"); printf("XMLbase: "); if(XML==rss) printf("rss"); if(XML==feed)printf("atom"); if(XML==rdf) printf("rdf"); printf("\n"); if(xmlversion[0]) printf("xmlsversion="%s"\n",xmlversion); if(rssversion[0]) printf("rssversion="%s"\n",rssversion); printf("
"); } else {printf("Arachne Feedreader\n"); printf("Arachne Feedreader:"); printf(" Cannot convert feed to html !
\n"); printf("Generated by rss2htm.exe\n"); } printf("\n"); } void OutHead() { printf("%s\n",TITLE); printf("\n"); if(charset[0]) printf("\n",34,34,34,charset,34); printf("\n"); printf("\n"); printf("
WEBSITE:  \n"); CutString(LINKTITLE,300); printf("%s\n",34,LINK,34,LINKTITLE); printf("Arachne \n"); printf("Feedreader
\n",34,34); printf("
\n"); } void OutItemrss() { int linklength=440; printf("\n"); if(strlen(AUTHOR)<2)strcpy(AUTHOR,NEWSAUTHOR); if(strlen(AUTHOR)<2)strcpy(AUTHOR,NEWSSERVICE); CutString(AUTHOR,180); // 17 printf("
  • %s\n",AUTHOR); if(PUBDATE[0]) linklength=230; CutString(LINKTITLE,linklength); // 240 printf("
  • %s
    \n",linklength,34,LINK,34,LINKTITLE); if(PUBDATE[0]) printf("
    %s\n",PUBDATE); printf("
    \n"); if(CONTENT[0])printf("%s
    \n",CONTENT); if(Tempfile){printf("\n"); LoadTempFile();printf("
    \n"); Hlineflag=1; return; } if(DESCRIPTION[0]){printf("\n"); printf("%s
    \n",DESCRIPTION);Hlineflag=1; } else Hlineflag=0; } void LoadHtmlTag(char *p,char *buffer) { int i=0; while(*p!='0') {buffer[i]=*p; if(i==5)break; if(*p==' ')break; p++; i++; } buffer[i+1]=0; return ; } /* int SweepHtml(char *p) { char character,*p1,*p2,*p3; // don't delete ",4)==0) goto weiter2; if(strncmp(buffer,"",6)==0) goto weiter2; if(strncmp(buffer,"",4)==0) goto weiter2; if(strncmp(buffer,"",6)==0) goto weiter2; while((*p1!=0)&&(*p1!='>')) p1++; // jump htmltag <.....> if(*p1==0){*p=0;return;} // delete DESCRIPTION p1++; goto weiter; weiter2: *p2=character; p2++;p1++; goto weiter; } */ int LoadTxt(char *text,int strlenmax,int load) { char c1,c2,c3,c4,c5; char *px; char *p=text; int xnum; int backflag=1; // 1=all text loaded 2=not all text loaded int counter=0; // counter++ = < counter-- => if(strlenmax<10)goto ende; strlenmax=strlenmax-10; // 500 bytes space for last <....> if(load==2)goto weiter; // don't analyze [CDATA *p=0; // clear DESCRIPTION if load=0 while(strlenmax) {character=getc(in); if(character==EOF)goto endfile; if(character=='<') { c1=getc(in); if(c1==EOF)goto endfile; if(c1=='/') {character=c1;loadchar=0;return 0;} if(c1=='!') {if((character=getc(in))!='[')goto skip; if((character=getc(in))!='C')goto skip; if((character=getc(in))!='D')goto skip; if((character=getc(in))!='A')goto skip; if((character=getc(in))!='T')goto skip; if((character=getc(in))!='A')goto skip; if((character=getc(in))!='[')goto skip; CDATA=1; // load <[CDATA[.......]]> goto weiter; } *p=character;p++; *p=c1;p++; goto weiter; } if(character>=' ')break; }goto weiter0; weiter: character=getc(in); weiter0: if(character==EOF)goto endfile; if((character=='<')&&(!CDATA)) {if(TYPE==TXT){*p=0;goto ende;} character=getc(in); if(character==EOF)goto endfile; xnum=LoadTagBuffer(); if(BACKTYPE==xnum) {*p=0;goto ende;} px=TagBuffer; *p='<';p++;counter++; if(xnum==100){*p='/';p++;} while(*px!=0){*p=*px;p++;px++;} character='>';goto weiter2; } if((character=='>')&&(CDATA)) {if((cd1==']')&&(cd2==']')){*(p-2)=0;goto ende;}} cd2=cd1;cd1=character; // CDATA flags if(character=='&') // processing " < > & etc. {c1=getc(in);if(c1==EOF)goto endfile; c2=getc(in);if(c2==EOF)goto endfile; c3=getc(in);if(c3==EOF)goto endfile; if((c1=='l')&&(c2=='t')&&(c3==';')) {character='<';goto weiter2;} if((c1=='g')&&(c2=='t')&&(c3==';')) {character='>';goto weiter2;} c4=getc(in);if(c4==EOF)goto endfile; if((c1=='a')&&(c2=='m')&&(c3=='p')&&(c4==';')) {character='&';goto weiter2;} if((c1=='#')&&(c4==';')) { if((c2>='1')&&(c2<='9')&&(c3>='0')&&(c3<='9')) {character=(c2-48)*10+c3-48; goto weiter2; } } c5=getc(in);if(c5==EOF)goto endfile; if((c1=='#')&&(c2=='0')&&(c3=='3')&&(c4=='4')&&(c5=';')) {character=34;goto weiter2;} if((c1=='a')&&(c2=='p')&&(c3=='o')&&(c4=='s')&&(c5=';')) {character=39;goto weiter2;} if((c1=='q')&&(c2=='u')&&(c3=='o')&&(c4=='t')&&(c5=';')) {character='"';goto weiter2;} *p=character;p++; *p=c1;p++; *p=c2;p++; *p=c3;p++; *p=c4;p++; character=c5; strlenmax=strlenmax-5; } weiter2: *p=character; p++; if(character=='<')counter++; // indicator for if(character=='>')counter--; // strlenmax--; if(strlenmax>150)goto weiter; // for SweepHtml() only if(strlenmax>7)if(counter)goto weiter; // don't break htmltags ! backflag=2;goto ende; // not all text loaded ! ende: *p=0; return backflag; endfile: FileEnd=1; return 0; skip: JumpTagEnd(); return 0; } int LoadTagBuffer() { char str[100]; char character2=' '; // for .../> detection int flag=0; // ') {if(character2=='/')TagEndSlash=1;goto ende;} if(character==10)goto weiter2; if(character==13)goto weiter2; TagBuffer[i]=character; i++; if(i>=TagBufferlength){TagBufferLoad=0;JumpTagEnd();goto ende;} character2=character; weiter2: character=getc(in); goto weiter; ende: TagBuffer[i]=0; i=1; while(i mechanism TYPE=TXT; // default Tag=LoadTagBuffer(); if(FileEnd)goto fileend; if(Tag==xml) {if(!charset[0]) LoadVar(charset,"encoding=",varlength); if(!xmlversion[0])LoadVar(xmlversion,"version=",varlength); goto weiter; } if(Tag==rss) {XML=rss; LoadVar(rssversion,"version=",varlength); goto weiter; } if(Tag==feed) {XML=feed; LoadVar(rssversion,"version=",varlength); goto weiter; } if(Tag==rdf) {XML=rdf; goto weiter; } if(XML==rss) goto RSSProcessing; if(XML==feed)goto ATOMProcessing; if(XML==rdf) goto RDFProcessing; goto weiter; // ------------------------------------------------------------ // // // // // --------------- begin RSS -processing -------------------- // ------------------------------------------------------------ RSSProcessing: switch (Tag) { case slash_rss: goto ende; // end of RSS-processing case channel: Mode=channel; break; case slash_channel: break; case title: BACKTYPE=slash_title; LoadTxt(LINKTITLE,LINKTITLEMAX,0); if(FileEnd)goto fileend; if(Mode==channel)strcpy(NEWSSERVICE,LINKTITLE); break; case slash_title: break; case link: BACKTYPE=slash_link; LoadTxt(LINK,LINKMAX,0); if(FileEnd)goto fileend; break; case slash_link: break; case description: BACKTYPE=slash_description; Back=LoadTxt(DESCRIPTION,DESCRIPTIONMAX,0); if(FileEnd)goto fileend; // SweepHtml(DESCRIPTION); if(Back==2)SaveToTempFile(Back); break; case slash_description: break; case item: if(Mode==channel) // HEADEROUT {strcpy(TITLE,LINKTITLE); if(strlen(DESCRIPTION)>3) {if(strlen(DESCRIPTION)>50)DESCRIPTION[49]='\0'; strcpy(TITLE,DESCRIPTION); } OutHead(); } Mode=item; ClearParameter(); break; case slash_item : OutItemrss(); break; case pubDate: BACKTYPE=slash_pubDate; LoadTxt(PUBDATE,PUBDATEMAX,0); if(FileEnd)goto fileend; break; case slash_pubDate: break; case author: if(TagEndSlash)break; // detect BACKTYPE=slash_author; LoadTxt(AUTHOR,AUTHORMAX,0); if(FileEnd)goto fileend; break; case slash_author: break; case creator: if(TagEndSlash)break; // detect BACKTYPE=slash_creator; LoadTxt(AUTHOR,AUTHORMAX,0); if(FileEnd)goto fileend; break; case slash_creator: break; case image: if(TagEndSlash)break; JumpToSlash(Tag); break; case slash_image : break; case 99: break; // default: break; // Unknown tags } if(FileEnd)goto fileend; goto weiter; // ----------------------------------------------------------- // -------------------- End RSS-processing ------------------- // // // // // -------------------- Start ATOM-processing ---------------- // ----------------------------------------------------------- ATOMProcessing: switch (Tag) { case slash_feed: goto ende; // end of feed-processing case title: BACKTYPE=slash_title; LoadTxt(LINKTITLE,LINKTITLEMAX,0); if(FileEnd)goto fileend; if(Mode==channel)strcpy(NEWSSERVICE,LINKTITLE); break; case slash_title: break; case subtitle: BACKTYPE=slash_subtitle; LoadTxt(TITLE,TITLEMAX,0); if(FileEnd)goto fileend; break; case slash_subtitle: break; case link: LoadVar(LINK,"href=",LINKMAX); if(FileEnd)goto fileend; if(LINK[0]==0) {BACKTYPE=slash_link; LoadTxt(LINK,LINKMAX,0); if(FileEnd)goto fileend; } break; case slash_link: break; case summary: LoadVar(type,"type=",varlength); if(strcmp(type,"html")==0) TYPE=HTML; if(strcmp(type,"xhtml")==0)TYPE=HTML; BACKTYPE=slash_summary; Back=LoadTxt(DESCRIPTION,DESCRIPTIONMAX,0); if(FileEnd)goto fileend; // SweepHtml(DESCRIPTION); if(Back==2)SaveToTempFile(Back); break; case slash_summary: break; case content: LoadVar(type,"type=",varlength); if(strcmp(type,"html")==0) TYPE=HTML; if(strcmp(type,"xhtml")==0)TYPE=HTML; BACKTYPE=slash_content; Back=LoadTxt(CONTENT,CONTENTMAX,0); if(FileEnd)goto fileend; if(CONTENT[0])SaveContentToFile(Back); break; case slash_content: break; case entry: if(Mode==channel) {strcpy(NEWSAUTHOR,AUTHOR); if(!TITLE[0])strcpy(TITLE,LINKTITLE); // TITLEMAX= LINKTITLEMAX OutHead(); } Mode=entry; ClearParameter(); break; case slash_entry : OutItemrss(); // OutItemfeed(); break; case updated: BACKTYPE=slash_updated; LoadTxt(PUBDATE,PUBDATEMAX,0); if(FileEnd)goto fileend; break; case slash_updated: break; case author: if(TagEndSlash)break; // detect BACKTYPE=slash_author; LoadTxt(AUTHOR,AUTHORMAX,0); if(FileEnd)goto fileend; SubMode=author; break; case slash_author : SubMode=0; break; case name: if(TagEndSlash)break; // detect if(SubMode==author) {BACKTYPE=slash_name; LoadTxt(AUTHOR,AUTHORMAX,0); if(FileEnd)goto fileend; } break; case slash_name : SubMode=0; break; case 99: break; // default: break; // Unknown tags } if(FileEnd)goto fileend; goto weiter; // ----------------------------------------------------------- // -------------------- End FEED-processing ------------------ // // // // // --------------- begin RDF-processing --------------------- // ------------------------------------------------------------ RDFProcessing: switch (Tag) { case slash_rdf: goto ende; // end of RSS-processing case channel: Mode=channel; break; case slash_channel: if(!LINKTITLE[0])strcpy(LINKTITLE,TITLE); // MAXLINK=TITELMAX OutHead(); Mode=item; break; case title: BACKTYPE=slash_title; if(Mode==channel)LoadTxt(TITLE,TITLEMAX,0); else LoadTxt(LINKTITLE,LINKTITLEMAX,0); if(FileEnd)goto fileend; if(Mode==channel)strcpy(NEWSSERVICE,TITLE); break; case slash_title: break; case link: BACKTYPE=slash_link; LoadTxt(LINK,LINKMAX,0); if(FileEnd)goto fileend; break; case slash_link: break; case description: BACKTYPE=slash_description; Back=LoadTxt(DESCRIPTION,DESCRIPTIONMAX,0); if(FileEnd)goto fileend; // SweepHtml(DESCRIPTION); if(Back==2)SaveToTempFile(Back); break; case slash_description: break; case item: if(Mode==channel){JumpToSlash(Tag);break;} ClearParameter(); break; case slash_item: OutItemrss(); break; case date: BACKTYPE=slash_date; LoadTxt(PUBDATE,PUBDATEMAX,0); if(FileEnd)goto fileend; break; case slash_date: break; case source: if(TagEndSlash)break; // detect BACKTYPE=slash_source; if(Mode==channel)LoadTxt(LINKTITLE,LINKTITLEMAX,0); else {if(!AUTHOR[0])LoadTxt(AUTHOR,AUTHORMAX,0);} if(FileEnd)goto fileend; break; case slash_source: break; case publisher: if(TagEndSlash)break; // detect BACKTYPE=slash_publisher; if(Mode==channel) {if(!LINKTITLE[0])LoadTxt(LINKTITLE,LINKTITLEMAX,0);} else LoadTxt(AUTHOR,AUTHORMAX,0); if(FileEnd)goto fileend; break; case slash_publisher: break; case creator: if(TagEndSlash)break; // detect BACKTYPE=slash_creator; if(Mode==channel) {if(!LINKTITLE[0])LoadTxt(LINKTITLE,LINKTITLEMAX,0);} else LoadTxt(AUTHOR,AUTHORMAX,0); if(FileEnd)goto fileend; break; case slash_creator: break; case image:if(TagEndSlash)break; // was JumpToSlash(Tag); // was break; case slash_image : break; case 99: break; // default: break; // Unknown tags } if(FileEnd)goto fileend; goto weiter; // ----------------------------------------------------------- // -------------------- End RDF-processing ------------------- // // // // // // ------------------ End of file ---------------------------- fileend: ende: fclose(in); OutEnd(); return; }