N
nn
time head -1000000 myfile >/dev/null
real 0m4.57s
user 0m3.81s
sys 0m0.74s
time ./repnullsalt.py '|' myfile
0 1 Null columns:
11, 20, 21, 22, 23, 24, 25, 26, 27, 30, 31, 33, 45, 50, 68
real 1m28.94s
user 1m28.11s
sys 0m0.72s
import sys
def main():
with open(sys.argv[2],'rb') as inf:
limit = sys.argv[3] if len(sys.argv)>3 else 1
dlm = sys.argv[1].encode('latin1')
nulls = [x==b'' for x in next(inf)[:-1].split(dlm)]
enum = enumerate
split = bytes.split
out = sys.stdout
prn = print
for j, r in enum(inf):
if j%1000000==0:
prn(j//1000000,end=' ')
out.flush()
if j//1000000>=limit:
break
for i, cur in enum(split(r[:-1],dlm)):
nulls |= cur==b''
print('Null columns:')
print(', '.join(str(i+1) for i,val in enumerate(nulls) if val))
if not (len(sys.argv)>2):
sys.exit("Usage: "+sys.argv[0]+
" <delimiter> <filename> <limit>")
main()
real 0m4.57s
user 0m3.81s
sys 0m0.74s
time ./repnullsalt.py '|' myfile
0 1 Null columns:
11, 20, 21, 22, 23, 24, 25, 26, 27, 30, 31, 33, 45, 50, 68
real 1m28.94s
user 1m28.11s
sys 0m0.72s
import sys
def main():
with open(sys.argv[2],'rb') as inf:
limit = sys.argv[3] if len(sys.argv)>3 else 1
dlm = sys.argv[1].encode('latin1')
nulls = [x==b'' for x in next(inf)[:-1].split(dlm)]
enum = enumerate
split = bytes.split
out = sys.stdout
prn = print
for j, r in enum(inf):
if j%1000000==0:
prn(j//1000000,end=' ')
out.flush()
if j//1000000>=limit:
break
for i, cur in enum(split(r[:-1],dlm)):
nulls |= cur==b''
print('Null columns:')
print(', '.join(str(i+1) for i,val in enumerate(nulls) if val))
if not (len(sys.argv)>2):
sys.exit("Usage: "+sys.argv[0]+
" <delimiter> <filename> <limit>")
main()