Discussion:
cat/tac/head/tail
(too old to reply)
root
2018-06-20 19:11:05 UTC
Permalink
Raw Message
The other day I was faced with a problem of fetching and re-ordering
lines in a large number of files. My mind boggled trying to see how
to use cat/tac and head/tail to do the job. For one thing the number
of lines in each file varied.

So I wrote the following utility which, as far as lines in a file
go, does everything the standard utilities do and much more.
Source code follows. I call the program ht (head/tail).

Almost everything I do involves using utilities as filter but
I added the ability for ht to read or write from a file.
My implementation of that is, admittedly, clumsy. Here is
an outline of ht functionality:

filter/program to accept input, then write to file if file
name is given as +first argument

Input is taken from stdin unless an argument is given
as ++filename as input file.

Succesive arguments define which lines are to be
printed and the order of printing.

lines are numbered 1 to n, the letter n represents
the last line. any sequence of digits represents
a line. the letter n followed by a sequence of
digits means n-that number: n5 means n-5
The hyphen is used to represent ranges of
numbers: 7-11 means lines 7 to 11 inclusive.

Without a preceding digit -... means 1-
without a following digit mean -n..

~xxx means not line xxx

Without any arguments the input is simply sent
to the output. With arguments the default is no
lines are output except those specifically named.

argument beginning with ++ means name for input file follows

argument beginning with + means name for output file follows

Examples of use:

cat file|ht n1-2 prints the second to the penultimate line in
reverse order

The same result can be obtained via:

ht ++file n1-2

ht ++file is the same as cat file.
ht ++file n-1 is the same as tac

I will use cat as the input to ht in the following examples.

cat | ht 1 4 2 6 outputs the lines 1 4 2 6 in that order

cat | ht 4-9 22-11 prints those lines in that order

Source code:


/*
filter/program to accept input, then write to file if file
name is given as +first argument

Input is taken from stdin unless an argument is given
as ++filename as input file.

Succesive arguments define which lines are to be
printed and the order of printing.

lines are numbered 1 to n, the letter n represents
the last line. any sequence of digits represents
a line. the letter n followed by a sequence of
digits means n-that number: n5 means n-5
The hyphen is used to represent ranges of
numbers: 7-11 means lines 7 to 11 inclusive.

Without a preceding digit -... means 1-
without a following digit mean -n..

~xxx means not line xxx

Without any arguments the input is simply sent
to the output. With arguments the default is no
lines are output except those specifically named.

argument beginning with ++ means name for input file follows

argument beginning with + means name for output file follows


*/



#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <termio.h>
#include <sys/wait.h>
#include <signal.h>
#include <time.h>
#include <sys/time.h>
#include <string.h>
#include <ctype.h>
#include <sys/types.h>

#define MEMSIZE 0x100000L
#define PCHUNK 0x10000
#define MORE 1000000

struct stat statbuf;
char *infile;
int INFILE=0;


char *bigread(nch)
int *nch;
{
size_t fsize,maxsize;
long count;
int j,newinp;
char *orgptr,*saveptr;
char *mptr;
char *ebuf;
int nchars;


maxsize=MEMSIZE;
orgptr=(char *)malloc(MEMSIZE);
if(orgptr==NULL){
printf("Allocation Error\n");
exit(9);
}
mptr=orgptr;
fsize=0;
do{
if(fsize+PCHUNK>=maxsize){
maxsize+=MORE;
saveptr=orgptr;
orgptr=(char *)realloc(orgptr,maxsize);
}
if(orgptr){
ebuf=orgptr+fsize;
//X count=read(STDIN_FILENO,ebuf,PCHUNK);
count=read(INFILE,ebuf,PCHUNK);
if(count>0){
ebuf+=count;
fsize+=count;
}
}
} while(orgptr && count>0);
if(INFILE) close(INFILE);



if(!orgptr) orgptr=saveptr;
mptr=orgptr;
*nch=fsize;
return mptr;
}


int not,low,high,b4;
int numlines;

void parse(ptr)
char *ptr;
{
char *ptr0,*ptrx;

not=0;


if(*ptr=='~'){
++not;
++ptr;
}
ptrx=ptr;

low=high=0;

if(*ptr=='.'&&ptr[1]=='n') ++ptr;



if(isdigit(*ptr)){
low=atoi(ptr);
while(isdigit(*ptr)) ++ptr;
high=low;
if(!*ptr) return;
}
else switch(*ptr){
case 'n':
++ptr;
b4=0;
if(isdigit(*ptr)){
b4=atoi(ptr);
while(isdigit(*ptr)) ++ptr;
}
low=numlines-b4;
if(low<1) low=1;
if(*ptr!='-'){
high=low;
return;
}

++ptr;
if(*ptr=='n'){
high=numlines;
++ptr;
if(!isdigit(*ptr)) return;
b4=atoi(ptr);
if((high-b4)>=1) high-=b4;
else high=1;
return;
}
if(!*ptr) high=1;
else high=atoi(ptr);
return;
case '-':
low=1;
while(*ptr=='-') ++ptr;
if(isdigit(*ptr)){
high=atoi(ptr);
return;
}
if(*ptr=='n'){
++ptr;
b4=0;
high=numlines;
if(isdigit(*ptr)) b4=atoi(ptr);
if((high-b4)>=1) high-=b4;
else high=1;
return;
}
high=numlines;
return;
default:return;
}

if(*ptr=='-'){
++ptr;
if(*ptr=='n'){
++ptr;
b4=0;
high=numlines;
if(isdigit(*ptr)) b4=atoi(ptr);
if((high-b4)>=1) high-=b4;
else high=1;
return;
}
if(isdigit(*ptr)){
high=atoi(ptr);
return;
}
high=numlines;
return;
}
if(isdigit(*ptr)){
high=atoi(ptr);
return;
}
if(*ptr=='n'){
high=numlines;
b4=atoi(ptr);
if((high-b4)>=1) high-=b4;
else high=1;
return;
}
high=numlines;
//printf("FELL THROUGH\n");
}

FILE *outfd;
char **lptrs;

void send(k)
int k;
{
char *ptr;

if(k<1||k>numlines) return;
ptr=lptrs[k-1];
if(ptr) fprintf(outfd,"%s\n",ptr);
}


int main(argc,argv)
int argc;
char *argv[];
{
char *lines,*ptr,c;
int j,k,kin,numch,numw,fd;
int numarg,n;

for(j=1,kin=1;j<argc;++j){
ptr=argv[j];
if(*ptr=='+'){
++ptr;
++kin;
if(*ptr=='+'){
++ptr;
INFILE=open(ptr,O_RDONLY); //NOT O_LARGEFILE
if(INFILE<0){
printf("Cannot Read %s\n",ptr);
exit(1);
}
}
}
}





lines=ptr=bigread(&numch);
numlines=0;
while(*ptr){
if(*ptr=='\n') ++numlines;
++ptr;
}
lptrs=(char **)malloc(numlines*sizeof(char *));
lptrs[0]=ptr=lines;
n=0;
while(*ptr){
while(*ptr && *ptr!='\n') ++ptr;
if(*ptr=='\n'){
++n;
*ptr++=0;
lptrs[n]=ptr;
}
}


outfd=stdout;
if(argc==1){
for(k=0;k<numlines;++k) send(k+1);
exit(0);
}


for(j=1;j<argc;++j){
ptr=argv[j];
if(*ptr=='+'&&ptr[1]!='+'){
++ptr;
outfd=fopen(ptr,"w");
if(!outfd){
printf("Cannot Write to %s\n",ptr);
exit(1);
}
}
}



if(argc>1&&argc==kin){
for(k=0;k<numlines;++k) send(k+1);
exit(0);
}


for(j=1;j<argc;++j){
if(*argv[j]=='+') continue;
parse(argv[j]);

if(not){
if(low==high){
lptrs[low-1]=0;
continue;
}
if(low>high){
for(k=low;k>=high;--k) lptrs[k-1]=0;
continue;
}
for(k=low;k<=high;++k) lptrs[k-1]=0;
continue;
}
if(low==high){
send(low);
continue;
}
if(low>high){
for(k=low;k>=high;--k) send(k);
continue;
}
for(k=low;k<=high;++k) send(k);
}

if(outfd!=stdout) fclose(outfd);


exit(0);
}
root
2018-06-20 19:18:17 UTC
Permalink
Raw Message
root <***@home.org> wrote:

I forgot to mention an important feature of ht. I
previously posted source to a program sink that
accumulated all its input before writing output.
ht has that feature which allows the following
usage:

cat file|ht ++file <stuff> which does not overwrite file
before ht has a chance to read the contents.

cat file | <someoperations> >file
erases the file so the pipe never has input.

sink has been the terminus in almost every piping operation
I have used since then.

Loading...