rmtags.c

1、-mno-cygwin : cygwin のライブラリーを使わない指定(標準の windows 環境で使用可能な配布フォーマット)
2、-mwindows : Send To で実行する時に、ウインドウを作らないためのトリック (GUIプログラム作成用オプション)
   (注意事項 ウインドウが生成されなくなるので、標準出力、エラー出力が表示されなくなる。)
3、ファイルIOがバイナリーモードなのは、ファイルの途中で EOF を返す事への対応
4、register 宣言は、変数をメモリー上に確保する必要がなく、処理の高速化でCPUのレジスターを変数に使用可能という宣言
5、{....} はフォーカス範囲を宣言するので、この範囲内でのみ有効な変数の宣言が可能
 131 } else { register int  c ;
      変数 c はこの赤い部分でのみ有効
 138 }

     1  #include <stdio.h>
     2  #include <stdlib.h>
     3  #include <string.h>
     4  #include <ctype.h>
     5  #include <sys/types.h>
     6  #include <sys/stat.h>
     7  // gcc -mno-cygwin -o rmtags rmtags.c
     8  // gcc -mno-cygwin -mwindows -o rmtags rmtags.c
     9
    10  #define TAG_MAX_SIZE    2048
    11  #define TOKEN_MAX_SIZE  512
    12  #define FAVOR     1
    13  #define NOFAVOR   (!FAVOR)
    14
    15  int     link_print_flag = FAVOR ;   // Link tag not contain "http"
    16
    17  FILE          *fperr ;
    18  unsigned char *buffer ;
    19  struct stat   fileInfo;
    20
    21  unsigned char getToken(unsigned char *buff, unsigned char *token, int n)
    22  {
    23  register int  i, c ;
    24
    25          for( i=0, *token='\0'; (c=*(buff+i)) != '\0' && i<=n; i++ ) {
    26                  if( c != ' ' && c != '>' && c != '\r' && c != '\n' ) {
    27                          *(token+i) = tolower(c) ; *(token+i+1) = '\0' ;
    28                  } else return(c) ;
    29          }
    30  }
    31
    32  int     seekEndToken(unsigned char *buff, unsigned char *token)
    33  { // <script type="text/javascript" src="my-javascript.js"/> & <script ...>...</script>
    34  unsigned char tkn[TOKEN_MAX_SIZE] ;
    35  register int  i, c, flg=FAVOR ;
    36
    37          for( i=1; (c=*(buff+i)) != '\0'; i++ ) {
    38                  if( c == '>' ) flg = NOFAVOR;
    39                  if( flg == FAVOR && c == '/' && *(buff+i+1) == '>' ) {
    40                          return( i + 1 ) ;
    41                  } else if ( c == '<' ) {
    42                          getToken((buff+i), tkn, strlen(token)) ;
    43                          strcat( tkn, ">") ;
    44                          if( strcmp( token, tkn ) == 0 ) {
    45                                  return( i + strlen(token) -1 ) ;
    46                          }
    47                  }
    48          }
    49          return( i ) ;
    50  }
    51
    52  unsigned char *checkStringsInTag(unsigned char *tag, unsigned char *strings)
    53  {
    54  unsigned char lowercase[TAG_MAX_SIZE] ;
    55  register int  i, length = (TAG_MAX_SIZE -1) > strlen(tag) ? strlen(tag) : TAG_MAX_SIZE -1 ;
    56
    57          for( i=0; i<length; i++ ) {
    58                  lowercase[i] = tolower(*(tag +i)) ; lowercase[i+1] = '\0';
    59          }
    60          return( strstr(lowercase, strings) ) ;
    61  }
    62
    63  int     linkTagOperation(unsigned char *buff, int size, int flag, FILE *fpo)
    64  {
    65  unsigned char link[TAG_MAX_SIZE] ;
    66  register int  i, j ;
    67
    68          link[0] = '\0' ;
    69          for(i=j=0; *(buff+i) != '>' && *(buff+i) != '\0' && !(j>0 && *(buff+i)=='<'); i++) {
    70                  if( j < TAG_MAX_SIZE -3 ) {
    71                          link[j] = *(buff+i); link[j+1] = '>'; link[j+2] = '\0';
    72                          j++;
    73                  }
    74          }
    75          if( *(buff+i) == '<' || *(buff+i) == '\0' ) {
    76                  fprintf( fperr, "Bad file format!\n" );
    77                  if( *(buff+i) == '\0' ) {
    78                          exit(1);
    79                  }
    80          } else if( checkStringsInTag(link, "http") == NULL && flag == FAVOR) {
    81                  fprintf( fpo, "%s", link ) ;
    82          }
    83          return( i ) ;
    84  }
    85
    86  execution(unsigned char *buff, int size, FILE *fpo)
    87  {
    88  unsigned char token[TOKEN_MAX_SIZE], link[TAG_MAX_SIZE] ;
    89  register int  i, j ;
    90
    91          for( i=0; i<size; i++ ) {
    92                  if( *(buff+i) == '<' &&  *(buff+i+1) != '/' ) {
    93                          getToken((buff+i), token, sizeof(token)-1) ;
    94                          if( strcmp(token, "<script") == 0 ) {
    95                                  i += seekEndToken( (buff+i), "</script>") ;
    96                          } else if( strcmp(token, "<noscript") == 0 ) {
    97                                  i += seekEndToken( (buff+i), "</noscript>") ;
    98                          } else if( strcmp(token, "<iframe") == 0 ) {
    99                                  i += seekEndToken( (buff+i), "</iframe>") ;
   100  //                      } else if( strcmp(token, "<object") == 0 ) {
   101  //                              i += seekEndToken( (buff+i), "</object>") ;
   102                          } else if( strcmp(token, "<link") == 0 ) {
   103                                  i += linkTagOperation(buff+i, size -i+1, link_print_flag, fpo) ;
   104                          } else  fputc(*(buff+i), fpo) ;
   105                  } else  fputc(*(buff+i), fpo) ;
   106          }
   107  }
   108
   109  read_data(unsigned char *buff, int size, FILE *fpi)
   110  {
   111  register int  i, c ;
   112
   113          for( i=0; i<size ; i++ ) {
   114                  if((c=fgetc(fpi)) == EOF ) {
   115                          fprintf( fperr, "Unexpected EOF was found.\n" );
   116                  } else *(buff+i) = c ;
   117          }
   118          *(buff+size) = '\0' ;
   119  }
   120
   121  main( int argc, char *argv[])
   122  {
   123  FILE    *fpi = stdin, *fpo = stdout ;
   124  register int  lp, data_size = 0 ;
   125
   126          for( lp=0, fperr=stderr; lp<argc; lp++ ) {
   127                  if( argc == 1 ) {
   128                          if( (fpi=tmpfile()) == NULL ) {
   129                                  fprintf( fperr, "Temp file open error!\n" );
   130                                  exit( 1 ) ;
   131                          } else { register int  c ;
   132                                  while( (c=fgetc(stdin)) != EOF ) {
   133                                          if( fputc(c,fpi) == EOF) {
   134                                                  fprintf( fperr, "Temp file write error!\n" );
   135                                                  exit( 1 ) ;
   136                                          }
   137                                  }       fflush(fpi);  rewind(fpi);
   138                          }
   139                  } else { if ( lp == 0 ) lp++ ;
   140                          if ( (fpi=fopen(argv[lp],"rb")) == NULL ) {
   141                                  fprintf( fperr, "File '%s' open error!\n", argv[lp] );
   142                                  exit( 1 ) ;
   143                          }
   144                  }
   145
   146                  if ( fstat(fileno(fpi), &fileInfo) != 0 ) {
   147                          fprintf( fperr, "fstat failed!\n" );
   148                          exit( 1 ) ;
   149                  } else data_size = fileInfo.st_size ;
   150
   151                  if( (buffer=(unsigned char *)malloc(data_size +1)) == NULL) {
   152                          fprintf( fperr, "Could not allocate memory!\n" );
   153                          exit( 1 ) ;
   154                  } else read_data(buffer, data_size, fpi) ;
   155                  fclose(fpi) ;
   156
   157                  if( argc > 1 && (fpo=fopen(argv[lp],"wb")) == NULL ) {
   158                          fprintf( fperr, "File '%s' open error!\n", argv[lp] );
   159                          exit( 1 ) ;
   160                  } else execution(buffer, data_size, fpo);
   161                  fclose(fpo) ;
   162
   163                  free(buffer);
   164          }
   165          return 0 ;
   166  }