Collate test.
// collate test
// by c.p.brown 2026
//
// TODO:
// - improve packseq() to distribute indices more evenly
// - test all opcollate options
// - fix op.argreport padding
// - stresstest
//
// abstractions are for compatibility with similar programs
//
// current structure (highly simplified)
//
// work [ do stuff to subsets of ops[n].dat via ops[n].writetodat[] ]
// op [
// src []
// arg []
// dat []
// writetodat [ ]
// op [ init ]
// eval [
// threadpool.add [ work ]
// threadpool.add [ work ]
// threadpool.add [ work ]
// ]
// ]
// ops []
// main [
// ops += new op[]
// ops[0].argnput[]
// ops[0].eval[]
// ops += new op[]
// ops[1].setsrc[0]
// ops[1].argnput[]
// ops[1].eval[]
// ...
// ]
GLib.Mutex mutex;
int nthr;
struct packetu {
uint[] items;
}
struct packet {
int[] items;
}
string printusecs (int64 u) {
string us = "%lld h".printf((((double) u) / 3600000000));
if (u < 1000) { return "%lld μs".printf(u); }
if (u < 1000000) { return "%.2f ms".printf((((double) u) / 1000.0)); }
if (u < 60000000) {
return "%.2f s".printf((((double) u) / 1000000.0));
}
if (u < 3600000000) {
return "%.2f m".printf((((double) u) / 60000000.0));
}
return us;
}
string printintarray (int[] s, int f) {
string[] ii = {"", "\"", "{", "[", "[", "| ", "", ""};
string[] dd = {";", ",", ",", " ", ",", " | ", " ", "\n"};
string[] oo = {"", "\"", "}", "]", "]", " |", "", ""};
string o = ii[f];
for (int i = 0; i < (s.length - 1); i++) {
o = (o + "%d".printf(s[i]) + dd[f]);
}
o = (o + "%d".printf(s[s.length - 1]) + oo[f]);
return o;
}
string printstringarray (string[] s, int f) {
// 0 1 2 3 4 5 6 7 8
string[] ii = {"", "\"", "{", "[", "[", "| ", "", "", ""};
string[] dd = {";", ",", ",", " ", ",", " | ", " ", "\n", ",\n"};
string[] oo = {"", "\"", "}", "]", "]", " |", "", "", ""};
string o = ii[f];
for (int i = 0; i < (s.length - 1); i++) {
o = (o + "%s".printf(s[i]) + dd[f]);
}
o = (o + "%s".printf(s[s.length - 1]) + oo[f]);
return o;
}
int spick (string[] h, string n, bool nc) {
// immitation of pick from Rebol
// nc is ignore case
// return first index of string in string[]
// use for small <1000 item string[] arrays like column headers,
// this is slow, but should be faster than making a hash-indexed uint[] at the sub 1k scale
if (nc) {
for (int i = 0; i < h.length; i++) {
if (strcmp(h[i].up(),n.up()) == 0) { return i; }
}
} else {
for (int i = 0; i < h.length; i++) {
if (strcmp(h[i],n) == 0) { return i; }
}
}
return -1;
}
int ipick (int[] h, int n) {
// immitation of pick from Rebol
// return first index of int in int[]
// use for arrays of less than 100M items
for (int i = 0; i < h.length; i++) {
if (h[i] == n) { return i; }
}
return -1;
}
bool isanumber (string s) {
for (int x = 0; x < s.char_count(); x++) {
int i = s.index_of_nth_char(x);
unichar u = s.get_char(i);
if (u.isdigit() == false) { return false; }
}
return true;
}
int[] rangeseq (int p, int s) {
// p packet count, s sequence count
// returns rangefrom,rangeto pairs in an unrolled 1d array
int[] o = {};
int tasksperthread = s / p;
int dreg = 0;
int lthr = p;
double npt = ((double) s) / ((double) tasksperthread);
if ((tasksperthread * p) < s) {
double mxp = ((double) p);
while (npt > mxp) {
tasksperthread += 1;
npt = ((double) s) / ((double) tasksperthread);
}
dreg = s - (tasksperthread * ((int) npt));
if (dreg > 0) { lthr = p - 1; }
}
int np = (int) npt;
for (int i = 0; i < np; i++) {
o += (i * tasksperthread);
o += ((i * tasksperthread) + (tasksperthread - 1));
}
if (dreg > 0) {
o += (np * tasksperthread);
o += ((np * tasksperthread) + (dreg - 1));
}
return o;
}
public class colcharcounter {
private int packetnum;
private int opindex;
private int[] rows;
private int[] cols;
private int rowcountoffset;
private int csz;
public colcharcounter (int k, int o, int[] rws, int[] cls, int ofs) {
packetnum = k;
opindex = o;
rows = rws;
cols = cls;
csz = cls.length;
rowcountoffset = ofs;
}
public void run (ref int[] celcharcounts, ref int packetscomplete) {
// celcharcounts may use different dimensions to ops.dat due to maksking,
// so rx and cx are used instead of dat row/col indices
int rx = rowcountoffset;
foreach (int r in rows) {
int cx = 0;
foreach (int c in cols) {
celcharcounts[((rx*csz)+cx)] = ops[opindex].datcelcharcountatrc(r,c,false);
cx += 1;
}
rx += 1;
}
mutex.lock();
packetscomplete += 1;
mutex.unlock();
}
}
public class celcharcounter {
private int packetnum;
private int opindex;
private int celstart;
private int celend;
public celcharcounter (int k, int o, int cs, int ce) {
packetnum = k;
opindex = o;
celstart = cs;
celend = ce;
}
public void run (ref int packetscomplete) {
for (int i = celstart; i <= celend; i++) {
ops[opindex].datcelcharcount(i,true);
}
mutex.lock();
packetscomplete += 1;
mutex.unlock();
}
}
class rgb : Object {
public float rr;
public float gg;
public float bb;
public void setrgb (double r, double g, double b) {
rr = (float) r;
gg = (float) g;
bb = (float) b;
}
public uint8 ired () { return ((uint8) ((rr * 255.0) + 0.5)); }
public uint8 igreen () { return ((uint8) ((gg * 255.0) + 0.5)); }
public uint8 iblue () { return ((uint8) ((bb * 255.0) + 0.5)); }
public rgb () {
rr = (float) 0.0;
gg = (float) 0.0;
bb = (float) 0.0;
}
}
abstract class dop : Object {
public uint uid; // op uid
public string[] dat; // the data
public string[] hed; // headers
public int hsz; // header property count
public uint[] src; // source operator uids
public string[] arg; // args for the op function
public string[] asp; // arg spec
public int asz; // arg pair count, derived from spec
public int aln; // arg total length
public string[] prp; // properties
public int[] dcc; // data cached char counts, used for rendering
public int dsz; // data columns
public string mtp; // mask type
public int[] msk; // mask indices
public int getopindexbyuid (uint u) {
for (int i = 0; i < ops.length; i++) {
if (ops[i].uid == u) { return i; }
}
return -1;
}
public int getmemyindex () {
for (int i = 0; i < ops.length; i++) {
if (ops[i].uid == uid) { return i; }
}
return -1;
}
public void srcappendbyindex (int o) {
src += ops[o].uid;
}
// HEADERS
// hed[] is an unrolled of 5 cols (hsz), n rows, where n is the number of columns in dat (dsz)
// cols are: 0 uid, 1 name, 2 type, 3 source op uid, 4 source column uid
// hed is not labelled, doesn't use name-based getting and setting as the spec is so basic
// headers should be initialized by ops that create data, propagated by ops that modify data, joined by ops that combine data
public int hedcount () {
print("hed.length is %d, hsz is %d\n",hed.length,hsz);
return (hed.length / hsz);
}
public int hedlength () { return hed.length; }
public string hedread (int i) { return hed[i]; }
public void hedsetuid (int c) {
// c is column index
// assumes hed name is written
int i = c * hsz;
int x = getmemyindex();
uint u = "%d_%s_%d_%lld".printf(c,hed[i+1],x,GLib.get_monotonic_time()).hash();
hed[i] = "%u".printf(u);
}
public string hedreadname (int c) { return hed[(c*hsz)+1]; }
public void hedsetname (int c, string s) {
// c is column index
int hc = hed.length / hsz;
if (c < hc) {
int a = c * hsz; // hed index at row c
int b = a + 1; // hed row[1] is name
hed[b] = s;
}
}
public void hedmakebynames (string[] n) {
hed.resize(0);
for (int i = 0; i < n.length; i++) {
hed += "";
hed += n[i];
hed += "STRING";
hed += "";
hed += "";
int c = ((hed.length - 1) / hsz);
hedsetuid(c);
}
}
public int hedgetcolbyuid (uint u) {
// returns header column index
if (u == 0) { return -1; }
// header uid is item 0 of each header
for (int i = 0; i < hed.length; i += hsz) {
uint hu = uint.parse(hed[i]);
if ((hu != 0) && (hu == u)) {
return (i / hsz);
}
}
return -1;
}
public int hedgetcolbyname (string n, bool nc) {
// returns header column index
if (n.char_count() == 0) { return -1; }
// header name is item 1 of each header
for (int i = 1; i < hed.length; i += hsz) {
string hn = "";
if (nc) {
hn = hed[i].up();
if (strcmp(n.up(),hn) == 0) { return (i / hsz); }
} else {
hn = hed[i];
if (strcmp(n,hn) == 0) { return (i / hsz); }
}
}
return -1;
}
public void hedstream (string[] s) {
// write an array to hed
// generates header uid if s[1] is less than 2 chars
// will bail to avoid corrupt headers
if (s.length != hsz) {
int x = getmemyindex();
print("ERROR hedstream ops[%d].supplied array %s was malformed, please make sure the string[] supplied to hedstream conforms to the dop.hed[] spec: {uid,name,type,sourceopuid,sourcecolumnuid}\n",x,printstringarray(s,2));
assert(false);
}
for (int i = 0; i < hsz; i++) {
hed += s[i];
}
if (s[0].char_count() < 2) {
// probably not a valid uid
int c = ((hed.length - 1) / hsz);
hedsetuid(c);
}
}
public string[] hedcopy (int p) {
// returns an array of header properties: {uid,name,type,src_op_uid,src_col_uid}
// p is a column index
// this will segfault if ((p * hsz) + hsz) is out of bounds
string[] o = {};
int a = p * hsz;
int b = a + hsz;
for (int i = a; i < b; i++) {
o += hed[i];
}
return o;
}
public void hedcopyfrom (int o) {
// overwrite header using a source header
// assumes this operator's dat has been filled
// handles dimension mismatch
hed.resize(0);
int scc = ops[o].datcolcount();
int shc = ops[o].hedcount();
for (int i = 0; i < dsz; i++) {
if ((i < shc) && (i < scc)) {
int a = i * hsz;
int b = a + hsz;
for (int h = a; h < b; h++) {
hed += ops[o].hedread(h);
}
if (ops[o].hedread(a).char_count() < 2) {
// probably not a valid uid
int c = ((hed.length - 1) / hsz);
hedsetuid(c);
}
} else {
// column is either out of source bounds, or out of source header bounds
hedstream({"","NULL","STRING","",""});
// give the header a uid
int c = ((hed.length - 1) / hsz);
hedsetuid(c);
}
}
}
public void hedinitialize () {
// placeholder header labels
hed.resize(0);
for (int i = 0; i < dsz; i++) {
string o = "";
int d = i+1; // shift to 1-based for spreadsheet style naming
while (d > 0) {
int x = ((d - 1) % 26);
o = "%c%s".printf((x+65),o);
d = ((d - x) - 1) / 26;
}
int x = getmemyindex();
uint u = "%d_%s_%d_%lld".printf(i,o,x,GLib.get_monotonic_time()).hash();
hed += "%u".printf(u); // uid
hed += "%d %s".printf(i,o); // name
hed += "STRING"; // type
hed += ""; // source op id (for referenced lists)
hed += ""; // source column id (for referenced lists)
}
}
public void datsetcolcount (int c) { dsz = c; }
public int datlength () { return dat.length; }
public int datcolcount () { return dsz; }
public int datrowcount () { return dat.length / dsz; }
public string datreadcel (int i) { return dat[i]; }
public string datreadcelatrc(int r, int c) {
return dat[((r*dsz)+c)];
}
public void datwritecelatrc(int r, int c, string s) {
StringBuilder sb = new StringBuilder("");
sb.append(s);
int i = ((r*dsz)+c);
dat[i] = sb.str;
dcc[i] = sb.str.char_count();
}
public unichar datcelcharatpos (int c, int p) {
return dat[c].get_char(dat[c].index_of_nth_char(p));
}
public int datcelcharcountatrc (int r, int c, bool k) {
int a = (r * dsz) + c;
if (k) {
dcc[a] = dat[a].char_count(); return dcc[a];
} else {
return dat[a].char_count();
}
}
public int datcelcharcount (int i, bool k) {
if (k) {
dcc[i] = dat[i].char_count(); return dcc[i];
} else {
return dat[i].char_count();
}
}
public bool datisin (string s) {
return (s in dat);
}
public void datload(string[] s, bool hh) {
// hh is hasheader, move 1st row to header
dat.resize(0);
int sl = s.length;
int firstrow = 0;
if (hh) { firstrow = dsz; }
for (int i = firstrow; i < sl; i++) {
StringBuilder sb = new StringBuilder("");
sb.append(s[i]);
dat += sb.str.dup();
}
if (hh) {
hed.resize(0);
for (int i = 0; i < dsz; i++) {
StringBuilder sb = new StringBuilder("");
sb.append(s[i]);
string hs = sb.str.dup();
string o = "";
int d = i+1; // shift to 1-based
while (d > 0) {
int x = ((d - 1) % 26);
o = "%c%s".printf((x+65),o);
d = ((d - x) - 1) / 26;
}
hed += "(%s) %s".printf(o,hs.up());
}
}
}
public void datcachecc () {
// cache cell char counts in dcc
// used by various operations
// not masked
int64 tts = GLib.get_monotonic_time();
dcc.resize(dat.length);
for (int i = 0; i < dcc.length; i++) { dcc[i] = 0; }
int opindex = getmemyindex ();
int[] celranges = rangeseq((nthr * 2), dat.length);
int packetscomplete = 0;
ThreadPool ccc = new ThreadPool.with_owned_data(
(celcharcounter) => {
celcharcounter.run(ref packetscomplete);
},nthr,false
);
// ccc.set_max_idle_time(1000);
int rowcountoffset = 0;
int n = 0;
for (int x = 0; x < celranges.length; x += 2) {
//print("datcachecc task %d cells %d to %d of %d...\n",n,celranges[x],celranges[x+1],(dat.length - 1));
ccc.add(
new celcharcounter(
n,
opindex,
celranges[x],
celranges[x+1]
)
);
n += 1;
}
//print("datcachecc farmed %d tasks\n",n);
while (packetscomplete != n) { GLib.Thread.usleep(1); }
int64 tte = GLib.get_monotonic_time();
print("ops[%d].datcachecc took %s\n".printf(opindex,printusecs(tte-tts)));
}
public void datprintrange (string m, int rf, int rt) {
int64 tts = GLib.get_monotonic_time();
int opindex = getmemyindex();
if (dcc.length != dat.length) { datcachecc(); }
int[] colcharcounts = new int[dsz];
for (int i = 0; i < dsz; i++) { colcharcounts[i] = 0; }
int rowcount = dat.length / dsz;
int rowfrom = 0;
int rowto = 0;
int colfrom = 0;
int colto = 0;
if (strcmp(m,"ROWS") == 0) {
rowfrom = rf;
rowto = rt;
colfrom = 0;
colto = dsz - 1;
// fix bad input, handle -1 range as max range
rowfrom = int.min(rowfrom,(rowcount - 1));
rowfrom = int.max(rowfrom,0);
rowto = int.min(rowto,(rowcount - 1));
if (rowto < 0) { rowto = rowcount - 1; }
if (rowfrom > rowto) { rowfrom = rowto; }
} else {
if (strcmp(m,"COLUMNS") == 0) {
rowfrom = 0;
rowto = rowcount - 1;
colfrom = rf;
colto = rt;
colfrom = int.min(colfrom,(dsz - 1));
colfrom = int.max(colfrom,0);
colto = int.min(colto,(dsz - 1));
if (colto < 0) { colto = dsz - 1; }
if (colfrom > colto) { colfrom = colto; }
}
}
// get column widths
// header name index is (col * headerpropertycount) + 1
// cell data index is ((row * colcount) + col)
for (int r = rowfrom; r <= rowto; r++) {
for (int c = colfrom; c <= colto; c++) {
int i = ((r * dsz) + c);
colcharcounts[c] = int.max(colcharcounts[c],dcc[i]);
}
}
for (int c = colfrom; c <= colto; c++) {
colcharcounts[c] = int.max(colcharcounts[c], hed[(c*hsz)+1].char_count());
}
// print
string p = "";
for (int n = colfrom; n <= colto; n++) {
string dlm = " | "; if (n == 0) { dlm = "| "; }
p = "%s%s%-*s".printf(p,dlm,colcharcounts[n],hed[(n*hsz)+1]);
}
p = "%s%s\n".printf(p," |");
for (int r = rowfrom; r <= rowto; r++) {
int x = 0;
for (int c = colfrom; c <= colto; c++) {
int n = ((r*dsz)+c);
string dlm = " | "; if (x == 0) { dlm = "| "; }
string datpart = "%-*s".printf(colcharcounts[x],dat[n]);
p = "%s%s%s".printf(p,dlm,datpart);
x += 1;
}
p = "%s%s\n".printf(p," |");
}
print(p);
int64 tte = GLib.get_monotonic_time();
print("ops[%d].datprintrc took %s\n".printf(opindex,printusecs(tte-tts)));
}
public int dcclength() { return dcc.length; }
public int dccget (int i) {
return dcc[i];
}
public int dccreadcelatrc (int r, int c) {
int i = ((r * dsz) + c);
return dcc[i];
}
// PROPERTIES
// prp[] is a label,value list
public string prpnget (string n) {
int pln = prp.length;
for (int i = 0; i < pln; i += 2) {
if (strcmp(prp[i],n) == 0) {
return prp[i+1];
}
}
return "UNKNOWN_PRP %s".printf(n);
}
public void prpnput (string n, string v) {
int pln = prp.length;
for (int i = 0; i < pln; i += 2) {
if (strcmp(prp[i],n) == 0) {
prp[i+1] = v;
return;
}
}
int idx = getmemyindex();
print("ops[%d].prpnput critical data error: property %s not found\n",idx,n);
}
// ARGS
// arg[] is unrolled rows of multiple args
// stored here as vala won't allow access to subclass variables without casting the object to a new variable of its subclass type, every time a variable needs to be read/changed, eg: opread r = (opread) ops[2]; r.filename = "./somefile.txt"
public int argcount () { return asz; }
public int arglength () { return aln; }
public int argrowcount () { return (arg.length / aln); }
public void argput (int p, int n, string s) {
int a = ((p * asz) * 2) + ((n * 2) + 1);
arg[a] = s;
}
void argnewrow () {
for (int i = 0; i < asz; i++) {
arg += asp[i]; arg += "";
}
}
public string argget (int p, int n) {
int a = ((p * asz) * 2) + ((n * 2) + 1);
return arg[a];
}
public string argnget (int p, string n) {
int a = ((p * asz) * 2);
for (int i = a; i < (a + aln); i += 2) {
if (strcmp(arg[i],n) == 0) {
return arg[i+1];
}
}
return "UNKNOWN_ARG %s".printf(n);
}
public void argnput (int r, string n, string v) {
// this function will intentionally crash the program if r exceeds row count -1
// or the arg label n is not found.
//
// r is arg row
// if r is -1, or r is 0 and arg[] is empty, append a new arg row
int p = r;
if ((r == 0) && (arg.length == 0)) {
argnewrow();
p = 0;
}
if (r == -1) {
argnewrow();
p = (arg.length / aln) - 1;
} else {
if ((r >= (arg.length / aln)) || (r < 0)) {
int idx = getmemyindex();
print("ops[%d].argnput critical arg error: row %d is out of bounds\n",idx,r);
assert(false);
}
}
// a is 0th cell of row p
int a = (p * aln);
//print("argnget cell 0 of row %d is %d of %d cells per row\n",p,(a+1),aln);
for (int i = a; i < (a + aln); i += 2) {
//print("\tchecking label in arg[%d] {%s}...\n",i,arg[i]);
if (strcmp(arg[i],n) == 0) {
arg[i+1] = v;
//print("\t\targ[%d] is {%s}, arg[%d] is {%s}\n",i,arg[i],(i+1),arg[i+1]);
return;
}
}
int idx = getmemyindex();
print("ops[%d].argnput critical arg error: arg %s not found in row %d\n",idx,n,r);
assert(false);
}
public string argreport (int ind, bool rnm, bool cnm, bool h, int m, rgb? hc) {
// arg is the string[]
// asz is column count
// h is render header
// rnm is render row numbers
// cnm is render cell numbers
// m is column to highlight
// hc is rgb object for highlight, or null if not requred
string tabs = ("%*s").printf(ind," ").replace(" ","\t");
string spc = " ";
int chc = 0;
bool domask = (m > -1);
int rcols = asz;
int rowcount = arg.length / aln;
int firstcol = 0;
string dlm = " | ";
uint8 ir = ((uint8) (255 * 0.4));
uint8 ig = ((uint8) (255 * 0.2));
uint8 ib = ((uint8) (255 * 0.2));
if (hc != null) {
ir = hc.ired();
ig = hc.igreen();
ib = hc.iblue();
}
//print("highlight color is uint8(%d,%d,%d)\n",ir,ig,ib);
if (rnm) {
firstcol = 1;
rcols = asz+1;
}
// cex is the char count required for cell numbers if requested
int cex = 0;
if (cnm) {
chc = "%d".printf(asz).char_count();
cex = chc + 3;
}
int[] cw = new int[rcols];
// get char counts of header 1st
for (int i = 0; i < cw.length; i++) { cw[i] = 0; }
if (rnm) { cw[0] = "%d".printf(rowcount).char_count(); }
if (h) {
for (int i = 0; i < asz; i++) {
cw[i+firstcol] = arg[i*2].char_count();
}
}
// get char counts of arg values
for (int i = 1; i < arg.length; i += 2) {
int c = (i % aln) / 2;
if (rnm) { c = c + 1; }
int cc = 0;
if (arg[i] != null) {
cc = arg[i].char_count();
if (cc > 0) {
cc = cc + cex;
int nwlat = arg[i].index_of("\n");
if (nwlat != -1) {
cc = (nwlat + 1 + cex);
}
}
}
cw[c] = int.max(cw[c],cc);
}
//print("column widths are %s\n\n",printintarray(cw,2));
// print
string o = "";
string ln = "";
// print header
if (h) {
for (int i = 0; i < cw.length; i++) {
dlm = " | ";
if (rnm && (i == 0)) {
dlm = "%s| ".printf(tabs);
string hh = "#";
o = "%s%s%-*s".printf(o,dlm,cw[i],hh);
} else {
if (i == 0) { dlm = "%s| ".printf(tabs); }
string hh = "";
hh = "%s".printf(arg[(i-firstcol)*2]);
o = "%s%s%-*s".printf(o,dlm,cw[i],hh);
}
if (i == (cw.length - 1)) { o = "%s |\n".printf(o); }
}
// print line
for (int i = 0; i < cw.length; i++) {
dlm = "-+-";
if (i == 0) { dlm = "%s|-".printf(tabs); }
ln = "%s%s%-*s".printf(ln,dlm,cw[i]," ").replace(" ","-");
if (i == (cw.length - 1)) { ln = "%s-|\n".printf(ln); }
}
o = "%s%s".printf(o,ln);
}
// print rows
int rcc = dsz + 1;
for (int r = 0; r < rowcount; r++) {
for (int c = 0; c < cw.length; c++) {
string datpart = " ";
if ((c == 0) && rnm) {
dlm = "%s| ".printf(tabs);
o = "%s%s%0*d".printf(o,dlm,cw[0],r);
} else {
dlm = " | ";
if (c == 0) { dlm = "%s| ".printf(tabs); }
int j = (((r * asz) + (c - firstcol)) * 2) + 1;
int cc = arg[j].char_count();
if (arg[j] != null) {
if (cc > 0) {
datpart = arg[j];
int nwlat = arg[j].index_of("\n");
if (nwlat != -1) {
//trunc = true;
datpart = (arg[j].substring(0,nwlat) + "…");
cc = (nwlat + 1);
}
} else {
cc = int.max(cc,0);
}
}
string clen = "";
if (cnm) { clen = "(%0*d) ".printf(chc,j); }
int padlen = (cw[c] - cc) - clen.char_count();
padlen = int.max(padlen,0);
datpart = "%s%s%.*s".printf(clen,datpart,padlen,spc);
if (domask) {
bool ismasked = ((m+firstcol) == c);
if (ismasked) {
datpart = "\x1b[48;2;%u;%u;%um%s\x1b[0m".printf(ir,ig,ib,datpart);
//datpart = "\x1b[48;2;76;25;25m%s\x1b[0m".printf(datpart);
}
}
o = "%s%s%s".printf(o,dlm,datpart);
if (c == (cw.length - 1)) { o = "%s |\n".printf(o); }
}
}
}
cw = null;
return o;
}
public abstract void eval ();
}
class filterrowsbycol : Object {
// updates a list of boolean values per row
// later used to filter an operation
private int thispacket;
private int firstrow;
private int lastrow;
private int colcount;
private bool filterignorecase;
private string filterwhat;
private string filterif;
private int filterlevel;
private int sop;
private int a;
public filterrowsbycol (int n, int fr, int lr, int cc, bool fic, string fwh, string fif, int flv, int sx, int ax) {
thispacket = n;
firstrow = fr;
lastrow = lr;
colcount = cc;
filterignorecase = fic;
filterwhat = fwh;
filterif = fif;
filterlevel = flv;
sop = sx;
a = ax;
}
public void eval(int[] incols, ref bool[] brow, ref int packetscomplete) {
for (int r = firstrow; r <= lastrow; r++) {
bool fm = false;
foreach (int c in incols) {
if (fm) { break; } // filter found something, stop checking columns
int i = (r * colcount) + c;
if (strcmp(filterif,"IS") == 0) {
if (filterignorecase) {
fm = (strcmp(ops[sop].datreadcel(i).up(),filterwhat.up()) == 0);
} else {
fm = (strcmp(ops[sop].datreadcel(i),filterwhat) == 0);
}
}
if (strcmp(filterif,"CONTAINS") == 0) {
if (filterignorecase) {
fm = (ops[sop].datreadcel(i).up().index_of(filterwhat.up()) > -1);
} else {
fm = (ops[sop].datreadcel(i).index_of(filterwhat) > -1);
}
}
if (strcmp(filterif,"STARTSWITH") == 0) {
if (filterignorecase) {
fm = (ops[sop].datreadcel(i).up().index_of(filterwhat.up()) == 0);
} else {
fm = (ops[sop].datreadcel(i).index_of(filterwhat) == 0);
}
}
if (strcmp(filterif,"ENDSWITH") == 0) {
if (filterignorecase) {
fm = ops[sop].datreadcel(i).up().has_suffix(filterwhat.up());
} else {
fm = ops[sop].datreadcel(i).has_suffix(filterwhat);
}
}
if (strcmp(filterif,"GREATERTHAN") == 0) {
if (filterignorecase) {
string aa = ops[sop].datreadcel(i).up();
string bb = filterwhat.up();
fm = (strcmp(bb,aa) > 0);
} else {
fm = (strcmp(filterwhat,ops[sop].datreadcel(i)) < 0);
}
}
if (strcmp(filterif,"LESSTHAN") == 0) {
if (filterignorecase) {
string aa = ops[sop].datreadcel(i).up();
string bb = filterwhat.up();
fm = (strcmp(bb,aa) < 0);
} else {
fm = (strcmp(filterwhat,ops[sop].datreadcel(i)) > 0);
}
}
} // end of column loop
if (a > 0) {
if (int.parse(ops[sop].argnget((a-1),"LEVEL")) != filterlevel) {
// start a new boolean composite whenever the level changes
brow[r] = fm;
} else {
int k = a-1;
if (
(a < (ops[sop].argrowcount() - 1)) &&
(filterlevel != int.parse(ops[sop].argnget((a+1),"LEVEL")))
) {
// we're last in the level, compare with the last of the previous level
for (int j = (a-1); j >= 0; j--) {
if (filterlevel != int.parse(ops[sop].argnget(j,"LEVEL"))) {
k = j; break;
}
}
}
// perform the requested boolean operation
if (ops[sop].argnget(k,"ANDORNOT") == "AND") {
brow[r] = (brow[r] && fm);
} else {
if (ops[sop].argnget(k,"ANDORNOT") == "OR") {
brow[r] = (brow[r] || fm);
} else {
if (ops[sop].argnget(k,"ANDORNOT") == "AND NOT") {
brow[r] = (brow[r] && !fm);
} else {
if (ops[sop].argnget(k,"ANDORNOT") == "OR NOT") {
brow[r] = (brow[r] || !fm);
}
}
}
}
}
} else { brow[r] = fm; }
} // end of row loop
mutex.lock();
packetscomplete += 1;
mutex.unlock();
}
}
class opcollate : dop {
public opcollate () {
arg = {};
dat = {};
dsz = 1;
asp = {};
arg = {};
asz = 1;
prp = {};
hed = {};
hsz = 5;
src = {};
msk = {};
dcc = {};
mtp = "NONE";
uid = "%s_%d_%lld".printf("dat",ops.length,GLib.get_monotonic_time()).hash();
// op properties
prp = {
"IGNORECASE", "FALSE",
"USEFILTERS", "TRUE"
};
asp = {
"FILTERWHAT",
"FILTERIF",
"FILTERIGNORECASE",
"FILTERLEVEL",
"FILTERHOW",
"FILTERANDOR",
"WHATROW",
"ROWNUMBER",
"ROWNUMBERS",
"ROWRANGEFROM",
"ROWRANGETO",
"ROWNTH",
"ROWNTHOFFSET",
"ROWNTHINRANGE",
"WHATCOL",
"COLNUMBER",
"COLNUMBERS",
"COLRANGEFROM",
"COLRANGETO",
"COLNTH",
"COLNTHOFFSET",
"COLNTHINRANGE",
"COLHEADERIDS",
"COLHEADERSBYNAMES",
"COLHEADERNAMES",
"COLHEADERIGNORECASE",
};
asz = asp.length;
aln = asz * 2;
argnewrow();
print("opcollate initializing parameters...\n");
argnput(0,"FILTERHOW","ROWS BY COLUMNS");
argnput(0,"WHATCOL","NUMBER");
argnput(0,"COLNUMBER","0");
argnput(0,"FILTERANDOR","AND");
argnput(0,"FILTERLEVEL","0");
argnput(0,"FILTERIF","IS");
print("opcollate initialized.\n");
}
public override void eval () {
int64 tts = GLib.get_monotonic_time();
print("opcollate evaluating...\n");
// collate data using column headers, prioritizing A over B for column order
// unique columns in B are appended, otherwise data filed under existing columns
//
// A COLUMN | 00 | 01 | 02 | 03 |
// A HEADER | AA | CC | DD | FF |
//
// B COLUMN | 00 | 01 | 02 |
// B HEADER | BB | DD | AA |
// B TARGET | 04 | 02 | 00 |
//
// C COLUMN | 00 | 01 | 02 | 03 | 04 |
// C HEADER | AA | CC | DD | FF | BB |
// C DATA | a | a | a | a | |
// | b | | b | | b |
int opsrclen = src.length;
if (opsrclen == 0) {
// nothing to do
print("opcollate has nothing to do.\n");
dat.resize(0);
dsz = 0;
return;
};
bool usefilter = (strcmp(prpnget("USEFILTERS"),"TRUE") == 0);
bool ignorecase = (strcmp(prpnget("IGNORECASE"),"TRUE") == 0);
// tc is target cols per source
// tc[2].items {4,0,1,2} files 3rd source data columns 0,1,2,3 to output columns 4,0,1,2
print("opcollate making source packets...\n");
packet[] tc = new packet[opsrclen];
print("opcollate fetching source op index from source[0] of %d soruces...\n",src.length);
int sop = getopindexbyuid(src[0]);
print("opcollate counting headers in op[%d]...\n",sop);
int shl = ops[sop].hedcount();
print("opcollate counted %d headers in op[%d].\n",shl,sop);
// maxcols becomes output data tocal column count .dsz
// allheaders becomes output headers .hed
int maxcols = shl;
string[] allheaders = {};
// initialize headers using 1st source
print("opcollate copying headers from op %d...\n",sop);
for (int i = 0; i < shl; i++) {
tc[0].items += i;
allheaders += ops[sop].hedreadname(i);
}
print("opcollate iterating over sources...\n");
// must be collected per source to compute ranges
packet[] filterrows = new packet[opsrclen];
packet[] filtercols = new packet[opsrclen];
int argc = arg.length / aln;
print("opcollate args are:\n%s",argreport(0,true,true,true,-1,null));
print("opcollate asz is %d, aln is %d, arg.length is %d, arg count is %d\n",asz,aln,arg.length,argc);
for (int s = 0; s < opsrclen; s++) {
sop = getopindexbyuid(src[s]);
print("opcollate getting dimensions of source[%d] op[%d]...\n",s,sop);
int rowcount = ops[sop].datrowcount();
int colcount = ops[sop].datcolcount();
print("opcollate source[%d] op[%d] has %d rows, %d columns.\n",s,sop,rowcount,colcount);
// filters are applied per source
// they are useful for collating a subset of data in sources, for example:
// - every 100th row of each source
// - the 1st 4th and 5th columns of each soruce
// - rows with "2023" in column "DATE" of each source
// - rows with "NULL" in any cell
if (usefilter) {
print("opcollate src[%d]...\n",s);
// booleans per row/col, used for compound filters
bool[] brow = new bool[rowcount];
bool[] bcol = new bool[colcount];
for (int i = 0; i < brow.length; i++) { brow[i] = false; }
for (int i = 0; i < bcol.length; i++) { bcol[i] = false; }
bool keepcols = true;
// loop through filters
print("opcollate src[%d] processing %d filters...\n",s,argc);
for (int a = 0; a < argc; a++) {
// skip malformed filters
print("opcollate src[%d] gathering args from filter %d of %d filters...\n",s,(a+1),argc);
string filterwhat = argnget(a,"FILTERWHAT");
if (filterwhat.char_count() == 0) { continue; }
bool filterignorecase = (strcmp(argnget(a,"FILTERIGNORECASE"),"TRUE") == 0);
int filterlevel = int.parse(argnget(a,"FILTERLEVEL"));
string filterif = argnget(a,"FILTERIF");
// gather column list
print("opcollate src[%d] filter %d \"FILTERHOW\" is %s\n",s,a,argnget(a,"FILTERHOW"));
if (strcmp(argnget(a,"FILTERHOW"),"ROWS BY COLUMNS") == 0) {
int[] incols = {};
string filterbywhatcol = argnget(a,"WHATCOL");
print("opcollate src[%d] filter %d is filtering by %s\n",s,a,filterbywhatcol);
if (strcmp(filterbywhatcol,"NUMBER") == 0) {
int fc = int.parse(argnget(a,"COLNUMBER"));
fc = int.min((colcount - 1),fc);
fc = int.max(fc,0);
if ((fc in incols) == false) { incols += fc; }
}
if (strcmp(filterbywhatcol,"NUMBERS") == 0) {
string[] filterbycolnumbers = argnget(a,"COLNUMBERS").split(";");
for (int i = 0; i < filterbycolnumbers.length; i++) {
int fc = int.parse(filterbycolnumbers[i]);
fc = int.min((colcount - 1),fc);
fc = int.max(fc,0);
if ((fc in incols) == false) { incols += fc; }
}
}
if (strcmp(filterbywhatcol,"RANGE") == 0) {
int rf = int.parse(argnget(a,"COLRANGEFROM"));
int rt = int.parse(argnget(a,"COLRANGETO"));
rf = int.min((colcount - 1),rf);
rf = int.max(rf,0);
rt = int.min((colcount - 1),rt);
rt = int.max(rf,rt);
for (int i = rf; i <= rt; i++) {
if ((i in incols) == false) { incols += i; }
}
}
if (strcmp(filterbywhatcol,"NTH") == 0) {
int nth = int.parse(argnget(a,"COLNTH"));
nth = int.max(nth,1);
int ofs = int.parse(argnget(a,"COLNTHOFFSET"));
ofs = int.max(ofs,0);
if (strcmp(argnget(a,"COLNTHINRANGE"),"TRUE") == 0) {
int rf = int.parse(argnget(a,"COLRANGEFROM"));
int rt = int.parse(argnget(a,"COLRANGETO"));
rf = int.min((colcount - 1),rf);
rf = int.max(rf,0);
rt = int.min((colcount - 1),rt);
rt = int.max(rf,rt);
for (int i = (rf+ofs); i <= rt; i += nth) {
if ((i in incols) == false) { incols += i; }
}
} else {
for (int i = ofs; i < colcount; i += nth) {
if ((i in incols) == false) { incols += i; }
}
}
}
if (strcmp(filterbywhatcol,"HEADERS") == 0) {
//print("opcollate src[%d] filter %d COLHEADERSBYNAMES is %s\n",s,a,argnget(a,"COLHEADERSBYNAMES"));
if (strcmp(argnget(a,"COLHEADERSBYNAMES"),"TRUE") == 0) {
string[] filtercolheadernames = argnget(a,"COLHEADERNAMES").split(";");
//print("opcollate src[%d] filter %d col header names are %s\n",s,a,printstringarray(filtercolheadernames,2));
for (int i = 0; i < filtercolheadernames.length; i++) {
int fc = ops[sop].hedgetcolbyname(
filtercolheadernames[i],
filterignorecase
);
//print("opcollate src[%d] filter %d header %s is in column %d\n",s,a,filtercolheadernames[i],fc);
if (fc > -1) {
fc = int.min((colcount - 1),fc);
if ((fc in incols) == false) { incols += fc; }
}
}
} else {
string[] filtercolheaderuids = argnget(a,"COLHEADERUIDS").split(";");
for (int i = 0; i < filtercolheaderuids.length; i++) {
int fc = ops[sop].hedgetcolbyuid(
uint.parse(filtercolheaderuids[i])
);
if (fc > -1) {
fc = int.min((colcount - 1),fc);
if ((fc in incols) == false) { incols += fc; }
}
}
}
}
// multithread filtering if row count is over threadcount * 4
int packetscomplete = 0;
if (rowcount > (nthr * 4)) {
int[] rowranges = rangeseq((nthr * 2), rowcount);
ThreadPool fbc = new ThreadPool.with_owned_data(
(filterrowsbycol) => {
filterrowsbycol.eval(
incols,
ref brow,
ref packetscomplete
);
},nthr,false
);
int n = 0;
for (int x = 0; x < rowranges.length; x += 2) {
fbc.add(
new filterrowsbycol(
n,
rowranges[x],
rowranges[x+1],
colcount,
filterignorecase,
filterwhat,
filterif,
filterlevel,
sop,
a
)
);
n += 1;
}
//print("datcachecc farmed %d tasks\n",n);
while (packetscomplete != n) { GLib.Thread.usleep(1); }
} else {
filterrowsbycol fbc = new filterrowsbycol(
0,
0,
(rowcount - 1),
colcount,
filterignorecase,
filterwhat,
filterif,
filterlevel,
sop,
a
);
fbc.eval(
incols,
ref brow,
ref packetscomplete
);
}
} else {
// filter columns by rows here
int[] inrows = {};
string filterbywhatrow = argnget(a,"WHATROW");
print("opcollate src[%d] filter %d is filtering by %s\n",s,a,filterbywhatrow);
if (strcmp(filterbywhatrow,"NUMBER") == 0) {
int fc = int.parse(argnget(a,"ROWNUMBER"));
fc = int.min((rowcount - 1),fc);
fc = int.max(fc,0);
if ((fc in inrows) == false) { inrows += fc; }
}
if (strcmp(filterbywhatrow,"NUMBERS") == 0) {
string[] filterbyrownumbers = argnget(a,"ROWNUMBERS").split(";");
for (int i = 0; i < filterbyrownumbers.length; i++) {
int fc = int.parse(filterbyrownumbers[i]);
fc = int.min((rowcount - 1),fc);
fc = int.max(fc,0);
if ((fc in inrows) == false) { inrows += fc; }
}
}
if (strcmp(filterbywhatrow,"RANGE") == 0) {
int rf = int.parse(argnget(a,"ROWRANGEFROM"));
int rt = int.parse(argnget(a,"ROWRANGETO"));
rf = int.min((rowcount - 1),rf);
rf = int.max(rf,0);
rt = int.min((rowcount - 1),rt);
rt = int.max(rf,rt);
for (int i = rf; i <= rt; i++) {
if ((i in inrows) == false) { inrows += i; }
}
}
if (strcmp(filterbywhatrow,"NTH") == 0) {
int nth = int.parse(argnget(a,"ROWNTH"));
nth = int.max(nth,1);
int ofs = int.parse(argnget(a,"ROWNTHOFFSET"));
ofs = int.max(ofs,0);
if (strcmp(argnget(a,"ROWNTHINRANGE"),"TRUE") == 0) {
int rf = int.parse(argnget(a,"ROWRANGEFROM"));
int rt = int.parse(argnget(a,"ROWRANGETO"));
rf = int.min((rowcount - 1),rf);
rf = int.max(rf,0);
rt = int.min((rowcount - 1),rt);
rt = int.max(rf,rt);
for (int i = (rf+ofs); i <= rt; i += nth) {
if ((i in inrows) == false) { inrows += i; }
}
} else {
for (int i = ofs; i < rowcount; i += nth) {
if ((i in inrows) == false) { inrows += i; }
}
}
}
if (strcmp(filterbywhatrow,"HEADER") == 0) {
//print("opcollate src[%d] filter %d COLHEADERSBYNAMES is %s\n",s,a,argnget(a,"COLHEADERSBYNAMES"));
if (strcmp(argnget(a,"ROWHEADERSBYNAMES"),"TRUE") == 0) {
string[] filterrowheadernames = argnget(a,"ROWHEADERNAMES").split(";");
//print("opcollate src[%d] filter %d col header names are %s\n",s,a,printstringarray(filtercolheadernames,2));
for (int i = 0; i < filterrowheadernames.length; i++) {
int fc = ops[sop].hedgetcolbyname(
filterrowheadernames[i],
filterignorecase
);
//print("opcollate src[%d] filter %d header %s is in column %d\n",s,a,filtercolheadernames[i],fc);
if (fc > -1) {
inrows += -1; break;
}
}
} else {
string[] filterrowheaderuids = argnget(a,"ROWHEADERUIDS").split(";");
for (int i = 0; i < filterrowheaderuids.length; i++) {
int fc = ops[sop].hedgetcolbyuid(
uint.parse(filterrowheaderuids[i])
);
if (fc > -1) {
inrows += -1; break;
}
}
}
}
// multithread filtering if col count is over threadcount * 4
int packetscomplete = 0;
bool checkheadersonly = ((inrows.length == 1) && (inrows[0] == -1));
if (!checkheadersonly && (inrows.length > (nthr * 4))) {
// compare column count to inrows[].length, switch processing methods accordingly:
// where rowcount greatly exceeds colcount:
// break inrows[] into packets, send each packet /per column/ to threaded filtercolsbyrow()
// set packet size to ensure total tasks are threadcount * column count,
// threadcount * 2 is normally optimal, but here we could have many tasks exiting after cumulative
// filter checks return true, while a few tasks remain on the queue for some time.
packet[] rowranges = packetseq((nthr * 2), colcount);
ThreadPool fbc = new ThreadPool.with_owned_data(
(filtercolsbyrow) => {
filtercolsbyrow.eval(
inrows,
ref bcol,
ref packetscomplete
);
},nthr,false
);
int n = 0;
for (int x = 0; x < rowranges.length; x += 2) {
fbc.add(
new filtercolsbyrow(
n,
colranges[x],
colranges[x+1],
rowcount,
filterignorecase,
filterwhat,
filterif,
filterlevel,
sop,
a
)
);
n += 1;
}
//print("datcachecc farmed %d tasks\n",n);
while (packetscomplete != n) { GLib.Thread.usleep(1); }
} else {
filtercolsbyrow fbc = new filtercolsbyrow(
0,
0,
(rowcount - 1),
colcount,
filterignorecase,
filterwhat,
filterif,
filterlevel,
sop,
a
);
fbc.eval(
incols,
ref bcol,
ref packetscomplete
);
}
} // end of cols by rows prep
} // end of filter loop
// collect row/col indices for this source using cumulative result of filters
if (keepcols) {
for (int i = 0; i < bcol.length; i++) { bcol[i] = true; }
for (int c = 0; c < colcount; c++) { filtercols[s].items += c; }
} else {
for (int c = 0; c < colcount; c++) { if (bcol[c]) { filtercols[s].items += c; } }
}
for (int r = 0; r < rowcount; r++) { if (brow[r]) { filterrows[s].items += r; } }
} // end of usefilter check
// update targets, headers & cumulative column count
print("opcollate src[%d] is collecting %d rows\n",s,filterrows[s].items.length);
print("opcollate src[%d] is collecting %d columns\n",s,filtercols[s].items.length);
foreach (int q in filtercols[s].items) {
string hh = ops[sop].hedreadname(q);
//print("\topcollate src[%d] col %d headername is %s\n",s,q,hh);
int hp = spick(allheaders,hh,ignorecase);
if (hp > -1) {
if ((hp in tc[s].items) == false) { tc[s].items += hp; }
} else {
if ((maxcols in tc[s].items) == false) {
tc[s].items += maxcols;
allheaders += hh;
maxcols += 1;
}
}
}
} // end of source loop
print("collated headers are:\n%s",printstringarray(allheaders,2));
print("opcollate populating dat...\n");
int lastrow = 0;
for (int s = 0; s < opsrclen; s++) {
sop = getopindexbyuid(src[s]);
//print("opcollate src[%d], %d rows, %d columns of %d max columns, writing from dat[%d]\n",s,filterrows[s].items.length,filtercols[s].items.length,maxcols,(lastrow * maxcols));
int rx = 0;
foreach (int r in filterrows[s].items) {
// write empties 1st
for (int c = 0; c < maxcols; c++) {
dat += "";
}
//print("\topcollate src[%d] row %d expanded dat to size %d\n",s,r,dat.length);
int scl = tc[s].items.length;
//print("\topcollate src[%d] row %d tc[%d].items.length is %d\n",s,r,s,scl);
for (int t = 0; t < scl; t++) {
// copy data
int ti = (lastrow * maxcols) + tc[s].items[t]; // target cell in this.dat
int si = (r * scl) + t; // source cell in ops[sop].dat
//print("\topcollate src[%d] at {%d,%d} cell %d targets {%d,%d} cell %d\n",s,r,t,si,lastrow,tc[s].items[t],ti);
dat[ti] = ops[sop].dat[si].dup();
}
//for (int i = 0; i < maxcols; i++) {
// print("\t\tdat[%d] is %s\n",((lastrow * maxcols) + i),dat[((lastrow * maxcols) + i)]);
//}
lastrow += 1;
rx += 1;
}
}
dsz = maxcols;
print("opcollate reconstructing headers...\n");
hedmakebynames(allheaders);
print("opcollate complete.\n");
int64 tte = GLib.get_monotonic_time();
print("opcollate took %s to collate %d rows %d cols\n",printusecs(tte-tts),(dat.length / dsz),dsz);
}
}
class opreadtext : dop {
private string filename { get; set; }
private string rowdelimiter;
private string coldelimiter;
private string quotestring;
private string escapestring;
private int[] columns;
private bool hasheader;
private int maxrows;
public opreadtext () {
arg = {};
dat = {};
dsz = 1;
hed = {};
hsz = 5;
prp = {};
src = {};
msk = {};
dcc = {};
mtp = "NONE";
uid = "%s_%d_%lld".printf("dat",ops.length,GLib.get_monotonic_time()).hash();
asp = {
"FILENAME",
"ROWDELIMITER",
"COLDELIMITER",
"QUOTESTRING",
"ESCAPESTRING",
"HASHEADER",
"COLUMNS",
"MAXROWS"
};
asz = asp.length;
aln = asz * 2;
foreach (string a in asp) { arg += a; arg += ""; }
// local params
filename = "";
rowdelimiter = "\n";
coldelimiter = "";
quotestring = "";
escapestring = "";
hasheader = true;
columns = {};
maxrows = -1;
}
public override void eval () {
int64 tts = GLib.get_monotonic_time();
// grab args
int opindex = getmemyindex();
filename = argnget(0,"FILENAME");
rowdelimiter = argnget(0,"ROWDELIMITER");
coldelimiter = argnget(0,"COLDELIMITER");
quotestring = argnget(0,"QUOTESTRING");
escapestring = argnget(0,"ESCAPESTRING");
maxrows = int.parse(argnget(0,"MAXROWS"));
hasheader = (strcmp(argnget(0,"HASHEADER"),"TRUE") == 0);
string[] colnums = argnget(0,"COLUMNS").split(";");
for (int i = 0; i < colnums.length; i++) {
if (isanumber(colnums[i])) {
columns += int.parse(colnums[i]);
}
}
bool nwl = (rowdelimiter == "\n");
print("ops[%d] opreadtext filename is .............. : %s\n",opindex,filename);
if (nwl) {
print("ops[%d] opreadtext row delimiter string is .. : \\n\n",opindex);
} else {
print("ops[%d] opreadtext row delimiter string is .. : %s\n",opindex,rowdelimiter);
}
print("ops[%d] opreadtext col delimiter string is .. : %s\n",opindex,coldelimiter);
print("ops[%d] opreadtext quote string is .......... : %s\n",opindex,quotestring);
print("ops[%d] opreadtext escape string is ......... : %s\n",opindex,escapestring);
if (hasheader) {
print("ops[%d] opreadtext hasheader is ............. : true\n",opindex);
} else {
print("ops[%d] opreadtext hasheader is ............. : false\n",opindex);
}
print("ops[%d] opreadtext column filter is ......... : %s\n",opindex,printintarray(columns,2));
// reset output
dat.resize(0);
dsz = 0;
// check filename
GLib.File ff = GLib.File.new_for_path(filename);
if (ff.query_exists() == false) { return; }
// stream readline into sbfile if row delimiter is newline
StringBuilder sbfile = new StringBuilder("");
if (nwl) {
GLib.FileStream fs;
try {
fs = GLib.FileStream.open(filename,"r");
} catch (Error e) {
print("ERROR ops[%d] opreadtext can't open file : %s\n",opindex,e.message);
return;
}
while (true) {
string l = fs.read_line();
if (l == null) { break; }
sbfile.append(l.dup() + "%c".printf('\x1E'));
}
} else {
try {
uint8[] c; string ex;
ff.load_contents (null, out c, out ex);
sbfile.append((string) c);
} catch (Error e) {
print ("ERROR ops[%d] opreadtext failed to read file : %s\n",opindex,e.message);
return;
}
}
bool keepquotes = false; // keep quotes, make this an arg if requred
string escapedquote = "";
unichar rdc = rowdelimiter.get_char(0);
unichar cdc = coldelimiter.get_char(0);
unichar qdc = quotestring.get_char(0);
unichar edc = escapestring.get_char(0);
char rc = '\x1E'; // ^^
char cc = '\x1F'; // ^_
char dd = '\x1D'; // ^]
char qq = '\x1C'; // ^\
string rds = "%c".printf(rc);
string cds = "%c".printf(cc);
string dds = "%c".printf(dd);
string qds = "%c".printf(qq);
bool hasquote = ((quotestring.char_count() > 0) && (sbfile.str.index_of_char(qdc) > -1));
bool hasescape = (escapestring.char_count() > 0);
bool hasescapedquote = (hasquote && hasescape);
// first replace escaped quotes with ccs, restore it after quote split
uint eqc = 0;
if (hasquote) {
if (hasescapedquote) {
escapedquote = (escapestring + quotestring);
eqc = sbfile.replace(escapedquote,qds,0);
}
//print("datreaddelimited found %u escaped quotes\n",eqc);
if ((eqc & 1) == 1) {
//print("datreaddelimited escape char {%s} is invalid or data is malformed, proceeding without escape...\n",ecs);
sbfile.replace(qds,escapedquote,0);
hasescape = false;
hasescapedquote = false;
eqc = 0;
}
string[] quotedsegs = sbfile.str.split(quotestring);
int ofs = 1;
if (sbfile.str.index_of_char(qdc) == 0) {
ofs = 0;
}
//print("datreaddelimited quoted segment count is %d\n",quotedsegs.length);
if (ofs == 0) {
print("ops[%d] opreadtext first segment is quoted\n",opindex);
}
for (int i = 0; i < quotedsegs.length; i++) {
StringBuilder sb = new StringBuilder("");
sb.append(quotedsegs[i]);
if (((i + ofs) % 2) == 1) {
if (hasescape) {
if (!nwl) {
string escapedrcs = (escapestring + rowdelimiter);
uint er = sb.replace(escapedrcs,dds,0);
sb.replace(rowdelimiter,rds,0);
if (er > 0) { sb.replace(dds,escapedrcs,0); }
}
string escapedccs = (escapestring + coldelimiter);
uint ec = sb.replace(escapedccs,dds,0);
int ccc = (int) sb.replace(coldelimiter,cds,0);
if (ec > 0) { sb.replace(dds,escapedccs,0); }
} else {
if (!nwl) { sb.replace(rowdelimiter,rds,0); }
int ccc = (int) sb.replace(coldelimiter,cds,0);
}
if (eqc > 0) { sb.replace(qds,escapedquote,0); }
quotedsegs[i] = sb.str;
}
}
sbfile.erase(0,-1);
for (int i = 0; i < quotedsegs.length; i++) {
if (keepquotes) {
if (((i + ofs) % 2) == 0) {
sbfile.append(quotestring + quotedsegs[i] + quotestring);
} else {
sbfile.append(quotedsegs[i]);
}
} else {
sbfile.append(quotedsegs[i]);
}
}
} else {
if (hasescape) {
if (!nwl) {
string escapedrcs = (escapestring + rowdelimiter);
uint er = sbfile.replace(escapedrcs,dds,0);
sbfile.replace(rowdelimiter,rds,0);
if (er > 0) { sbfile.replace(dds,escapedrcs,0); }
}
string escapedccs = (escapestring + coldelimiter);
uint ec = sbfile.replace(escapedccs,dds,0);
sbfile.replace(coldelimiter,cds,0);
if (ec > 0) { sbfile.replace(dds,escapedccs,0); }
} else {
if (!nwl) { sbfile.replace(rowdelimiter,rds,0); }
sbfile.replace(coldelimiter,cds,0);
}
}
string[] rows = sbfile.str.split(rds);
int firstrow = 0;
if (hasheader) { firstrow = 1; }
if (maxrows > 0) {
maxrows = int.min(maxrows,rows.length);
} else {
maxrows = rows.length;
}
print("ops[%d] opreadtext row count is ............. : %d\n",opindex,maxrows);
if (maxrows <= 0) {
dat.resize(0);
hed.resize(0);
dsz = 0;
return;
}
// get max col count, if rows have varying column counts
for (int i = firstrow; i < maxrows; i++) {
StringBuilder rsb = new StringBuilder("");
rsb.append(rows[i].dup());
uint ccc = rsb.replace(cds,qds);
dsz = int.max(((int) ccc),dsz);
}
print("ops[%d] opreadtext unfiltered column count is : %d\n",opindex,dsz);
int[] vfc = {}; // valid source column
if (columns.length > 0) {
foreach (int x in columns) {
if (x < dsz) { vfc += x; }
}
}
if (vfc.length == 0) {
for (int i = 0; i < dsz; i++) { vfc += i; }
} else {
print("ops[%d] opreadtext filtered columns are ..... : %s\n",opindex,printintarray(vfc,2));
}
for (int r = firstrow; r < maxrows; r++) {
string[] cols = rows[r].split(cds);
int rowcolcount = cols.length;
foreach (int c in vfc) {
if (c < rowcolcount) {
dat += cols[c];
} else {
dat += "";
}
}
}
dsz = vfc.length;
if (hasheader) {
hed.resize(0);
int fl = vfc.length;
string[] cols = rows[0].split(cds);
for (int i = 0; i < fl; i++) {
string hs = cols[vfc[i]].dup();
//string o = "";
//int d = i+1; // shift to 1-based
//while (d > 0) {
// int x = ((d - 1) % 26);
// o = "%c%s".printf((x+65),o);
// d = ((d - x) - 1) / 26;
//}
hed += "";
hed += hs;
hed += "STRING";
hed += "";
hed += "";
int c = ((hed.length - 1) / hsz);
hedsetuid(c);
}
} else {
hedinitialize();
}
sbfile.erase(0,-1);
print("ops[%d] opreadtext dsz is ................... : %d\n",opindex,dsz);
print("ops[%d] opreadtext mtp is ................... : %s\n",opindex,mtp);
print("ops[%d] opreadtext msk.length is ............ : %d\n",opindex,msk.length);
int64 tte = GLib.get_monotonic_time();
print("ops[%d] opreadtext from %s took %s\n".printf(opindex,filename,printusecs(tte-tts)));
}
}
dop[] ops;
void main() {
GLib.Intl.setlocale(ALL,"");
mutex = GLib.Mutex();
nthr = int.max(1,((int) GLib.get_num_processors()));
ops += new opreadtext();
int[] lcacols = {2,6,21,24};
ops[0].argnput(0,"FILENAME","./testdata/LCA_Disclosure_Data_FY2020_Q1.csv");
ops[0].argnput(0,"ROWDELIMITER","\n");
ops[0].argnput(0,"COLDELIMITER",",");
ops[0].argnput(0,"QUOTESTRING","\"");
ops[0].argnput(0,"ESCAPESTRING","\\");
ops[0].argnput(0,"HASHEADER","TRUE");
ops[0].argnput(0,"COLUMNS",printintarray(lcacols,0));
ops[0].argnput(0,"MAXROWS","-1");
ops[0].eval();
ops[0].datcachecc();
ops[0].datprintrange("ROWS",0,10);
ops += new opreadtext();
lcacols = {6,21,45};
ops[1].argnput(0,"FILENAME","./testdata/LCA_Disclosure_Data_FY2020_Q2.csv");
ops[1].argnput(0,"ROWDELIMITER","\n");
ops[1].argnput(0,"COLDELIMITER",",");
ops[1].argnput(0,"QUOTESTRING","\"");
ops[1].argnput(0,"ESCAPESTRING","\\");
ops[1].argnput(0,"HASHEADER","TRUE");
ops[1].argnput(0,"COLUMNS",printintarray(lcacols,0));
ops[1].argnput(0,"MAXROWS","-1");
ops[1].eval();
ops[1].datcachecc();
ops[1].datprintrange("ROWS",0,10);
ops += new opreadtext();
lcacols = {3,6,21,18};
ops[2].argnput(0,"FILENAME","./testdata/LCA_Disclosure_Data_FY2020_Q3.csv");
ops[2].argnput(0,"ROWDELIMITER","\n");
ops[2].argnput(0,"COLDELIMITER",",");
ops[2].argnput(0,"QUOTESTRING","\"");
ops[2].argnput(0,"ESCAPESTRING","\\");
ops[2].argnput(0,"HASHEADER","TRUE");
ops[2].argnput(0,"COLUMNS",printintarray(lcacols,0));
ops[2].argnput(0,"MAXROWS","-1");
ops[2].eval();
ops[2].datcachecc();
ops[2].datprintrange("ROWS",0,10);
ops += new opreadtext();
lcacols = {4,6,21,10,11};
ops[3].argnput(0,"FILENAME","./testdata/LCA_Disclosure_Data_FY2020_Q4.csv");
ops[3].argnput(0,"ROWDELIMITER","\n");
ops[3].argnput(0,"COLDELIMITER",",");
ops[3].argnput(0,"QUOTESTRING","\"");
ops[3].argnput(0,"ESCAPESTRING","\\");
ops[3].argnput(0,"HASHEADER","TRUE");
ops[3].argnput(0,"COLUMNS",printintarray(lcacols,0));
ops[3].argnput(0,"MAXROWS","-1");
ops[3].eval();
ops[3].datcachecc();
ops[3].datprintrange("ROWS",0,10);
ops += new opcollate();
ops[4].prpnput("IGNORECASE","FALSE");
ops[4].argnput(0,"FILTERIF","CONTAINS");
ops[4].argnput(0,"FILTERWHAT","EFFECTS");
ops[4].argnput(0,"FILTERIGNORECASE","TRUE");
ops[4].argnput(0,"FILTERHOW","ROWS BY COLUMNS");
ops[4].argnput(0,"WHATCOL","HEADERS");
ops[4].argnput(0,"COLHEADERSBYNAMES","TRUE");
ops[4].argnput(0,"COLHEADERNAMES","JOB_TITLE");
ops[4].srcappendbyindex(0);
ops[4].srcappendbyindex(1);
ops[4].srcappendbyindex(2);
ops[4].srcappendbyindex(3);
ops[4].eval();
ops[4].datcachecc();
ops[4].datprintrange("ROWS",0,10);
ops += new opcollate();
ops[5].prpnput("IGNORECASE","FALSE");
ops[5].argnput(0,"FILTERIF","STARTS WITH");
ops[5].argnput(0,"FILTERWHAT","JOB_TITLE");
ops[5].argnput(0,"FILTERIGNORECASE","TRUE");
ops[5].argnput(0,"FILTERHOW","COLUMNS BY ROW");
ops[5].argnput(0,"WHATROW","HEADER");
ops[5].argnput(0,"FILTERANDOR","OR");
ops[5].argnput(-1,"FILTERIF","ENDS WITH");
ops[5].argnput(-1,"FILTERWHAT","STATE");
ops[5].argnput(-1,"FILTERIGNORECASE","TRUE");
ops[5].argnput(-1,"FILTERHOW","COLUMNS BY ROW");
ops[5].argnput(-1,"WHATROW","HEADER");
ops[5].srcappendbyindex(0);
ops[5].srcappendbyindex(1);
ops[5].srcappendbyindex(2);
ops[5].srcappendbyindex(3);
ops[5].eval();
ops[5].datcachecc();
ops[5].datprintrange("ROWS",0,10);
}
#+RESULTS:
Compilation succeeded - 32 warning(s)
ops[0] opreadtext filename is .............. : ./testdata/LCA_Disclosure_Data_FY2020_Q1.csv
ops[0] opreadtext row delimiter string is .. : \n
ops[0] opreadtext col delimiter string is .. : ,
ops[0] opreadtext quote string is .......... : "
ops[0] opreadtext escape string is ......... : \
ops[0] opreadtext hasheader is ............. : true
ops[0] opreadtext column filter is ......... : {2,6,21,24}
ops[0] opreadtext row count is ............. : 112019
ops[0] opreadtext unfiltered column count is : 95
ops[0] opreadtext filtered columns are ..... : {2,6,21,24}
ops[0] opreadtext dsz is ................... : 4
ops[0] opreadtext mtp is ................... : NONE
ops[0] opreadtext msk.length is ............ : 0
ops[0] opreadtext from ./testdata/LCA_Disclosure_Data_FY2020_Q1.csv took 1.43 s
ops[0].datcachecc took 3.90 ms
| RECEIVED_DATE | JOB_TITLE | EMPLOYER_ADDRESS1 | EMPLOYER_STATE |
| 9/25/2019 | APPLICATION ENGINEER, OMS [15-1199.02] | 5555 DARROW ROAD | OH |
| 9/25/2019 | BI DEVELOPER II | 9170 IRVINE CENTER DRIVE | CA |
| 9/25/2019 | QUALITY ENGINEER | 24800 DENSO DRIVE | MI |
| 9/25/2019 | SOFTWARE DEVELOPER, APPLICATIONS | 3001 W. BIG BEAVER ROAD, SUITE 500 | MI |
| 9/25/2019 | QUALITY ENGINEER LEVEL II | 8033 UNIVERSITY BLVD | IA |
| 9/25/2019 | OPERATION RESEARCH ANALYSTS | 225 CREEKSTONE RIDGE, | GA |
| 9/25/2019 | SOFTWARE DEVELOPER | 801 E CAMPBELL RD | TX |
| 9/25/2019 | .NET LEAD DEVELOPER | 9300 WEST 110TH STREET | KS |
| 9/25/2019 | NETWORK ENGINEER | 5755 NORTH POINT PARKWAY | GA |
| 9/25/2019 | SOFTWARE DEVELOPER (.NET FULL STACK DEVELOPER) | 1081 CAMBRIDGE SQUARE | GA |
| 9/25/2019 | POWER BI ADMINISTRATOR | 622 GEORGES ROAD, | NJ |
ops[0].datprintrc took 32 μs
ops[1] opreadtext filename is .............. : ./testdata/LCA_Disclosure_Data_FY2020_Q2.csv
ops[1] opreadtext row delimiter string is .. : \n
ops[1] opreadtext col delimiter string is .. : ,
ops[1] opreadtext quote string is .......... : "
ops[1] opreadtext escape string is ......... : \
ops[1] opreadtext hasheader is ............. : true
ops[1] opreadtext column filter is ......... : {6,21,45}
ops[1] opreadtext row count is ............. : 157175
ops[1] opreadtext unfiltered column count is : 95
ops[1] opreadtext filtered columns are ..... : {6,21,45}
ops[1] opreadtext dsz is ................... : 3
ops[1] opreadtext mtp is ................... : NONE
ops[1] opreadtext msk.length is ............ : 0
ops[1] opreadtext from ./testdata/LCA_Disclosure_Data_FY2020_Q2.csv took 2.02 s
ops[1].datcachecc took 3.85 ms
| JOB_TITLE | EMPLOYER_ADDRESS1 | AGENT_REPRESENTING_EMPLOYER |
| Senior Systems Analyst JC60 | 211 QUALITY CIRCLE | N |
| Hardware Engineer | 690 E. Middlefield Rd | Y |
| Senior Systems Analyst JC60 | 211 QUALITY CIRCLE | N |
| Senior Systems Analyst JC60 | 211 QUALITY CIRCLE | N |
| Senior Java Developer | 3001 West Big Beaver Rd. | Y |
| Associate Director JC40 | 211 QUALITY CIRCLE | N |
| Manager JC50 | 211 QUALITY CIRCLE | N |
| System Analyst JC65 | 211 QUALITY CIRCLE | N |
| Manager JC50 | 211 QUALITY CIRCLE | N |
| DATASTAGE DEVELOPER | 9466 Black Mountain Road | Y |
| Manager JC50 | 211 QUALITY CIRCLE | N |
ops[1].datprintrc took 35 μs
ops[2] opreadtext filename is .............. : ./testdata/LCA_Disclosure_Data_FY2020_Q3.csv
ops[2] opreadtext row delimiter string is .. : \n
ops[2] opreadtext col delimiter string is .. : ,
ops[2] opreadtext quote string is .......... : "
ops[2] opreadtext escape string is ......... : \
ops[2] opreadtext hasheader is ............. : true
ops[2] opreadtext column filter is ......... : {3,6,21,18}
ops[2] opreadtext row count is ............. : 190751
ops[2] opreadtext unfiltered column count is : 95
ops[2] opreadtext filtered columns are ..... : {3,6,21,18}
ops[2] opreadtext dsz is ................... : 4
ops[2] opreadtext mtp is ................... : NONE
ops[2] opreadtext msk.length is ............ : 0
ops[2] opreadtext from ./testdata/LCA_Disclosure_Data_FY2020_Q3.csv took 2.56 s
ops[2].datcachecc took 5.52 ms
| DECISION_DATE | JOB_TITLE | EMPLOYER_ADDRESS1 | AMENDED_PETITION |
| 4/1/2020 | International Tax Manager | 111 RIDC Park W Dr. | 0 |
| 4/1/2020 | Research Technician | 1300 York Avenue | 0 |
| 4/1/2020 | Tableau Developer | 830 E. Higgins Rd | 1 |
| 4/1/2020 | Scientist | 641 AVENUE OF THE AMERICAS | 1 |
| 4/1/2020 | Senior OBIEE Developer | 4700 Exchange Court | 0 |
| 4/1/2020 | CIS MANAGER 2 | 132 TURNPIKE ROAD | 0 |
| 4/1/2020 | Law Clerk | 150 S. Los Robles Avenue, Suite 720 | 0 |
| 4/1/2020 | CLINICAL FELLOW | 2525 WEST END AVENUE | 0 |
| 4/1/2020 | Application Developer - Consultant | 425 Metro Place North | 1 |
| 4/1/2020 | Professional Services Senior Consultant | 800 Bridge Parkway | 1 |
| 4/1/2020 | Cloud ETL Developer | 8270 Greensboro Drive | 0 |
ops[2].datprintrc took 46 μs
ops[3] opreadtext filename is .............. : ./testdata/LCA_Disclosure_Data_FY2020_Q4.csv
ops[3] opreadtext row delimiter string is .. : \n
ops[3] opreadtext col delimiter string is .. : ,
ops[3] opreadtext quote string is .......... : "
ops[3] opreadtext escape string is ......... : \
ops[3] opreadtext hasheader is ............. : true
ops[3] opreadtext column filter is ......... : {4,6,21,10,11}
ops[3] opreadtext row count is ............. : 117397
ops[3] opreadtext unfiltered column count is : 95
ops[3] opreadtext filtered columns are ..... : {4,6,21,10,11}
ops[3] opreadtext dsz is ................... : 5
ops[3] opreadtext mtp is ................... : NONE
ops[3] opreadtext msk.length is ............ : 0
ops[3] opreadtext from ./testdata/LCA_Disclosure_Data_FY2020_Q4.csv took 1.54 s
ops[3].datcachecc took 4.15 ms
| ORIGINAL_CERT_DATE | JOB_TITLE | EMPLOYER_ADDRESS1 | BEGIN_DATE | END_DATE |
| | Staff Accountant | 10430 S De Anza Blvd | 10/1/2020 | 10/1/2023 |
| | Senior DevOps Engineer - KBGFJG72962-3 | 17095 Via Del Campo | 6/26/2020 | 6/25/2023 |
| | Senior DevOps Engineer - KBGFJG03640-5 | 17095 Via Del Campo | 6/26/2020 | 6/25/2023 |
| | SOFTWARE ENGINEER | 1 Chase Corporate Dr | 8/19/2020 | 8/19/2023 |
| | SALES MANAGER | 2035 LINCOLN HIGHWAY | 7/1/2020 | 6/30/2023 |
| | SOURCING ENGINEER | 2035 LINCOLN HIGHWAY | 10/1/2020 | 9/30/2023 |
| | Computer and Information Systems Managers - KBOEYTEST4310-22 | 200 Plaza Drive | 9/8/2020 | 9/7/2023 |
| | Computer and Information Systems Managers - KBOEYTEST4310-22 | 200 Plaza Drive | 9/8/2020 | 9/7/2023 |
| | Test Lead | 101 Wood Avenue South | 7/3/2020 | 7/2/2023 |
| | IT Project Manager | 101 Wood Avenue South | 7/2/2020 | 7/1/2023 |
| | Software Quality Assurance Engineer and Tester | 101 Wood Avenue South | 10/1/2020 | 9/30/2023 |
ops[3].datprintrc took 50 μs
opcollate initializing parameters...
opcollate initialized.
opcollate evaluating...
opcollate making source packets...
opcollate fetching source op index from source[0] of 4 soruces...
opcollate counting headers in op[0]...
hed.length is 20, hsz is 5
opcollate counted 4 headers in op[0].
opcollate copying headers from op 0...
opcollate iterating over sources...
opcollate args are:
| # | FILTERWHAT | FILTERIF | FILTERIGNORECASE | FILTERLEVEL | FILTERHOW | FILTERANDOR | WHATROW | ROWNUMBER | ROWNUMBERS | ROWRANGEFROM | ROWRANGETO | ROWNTH | ROWNTHOFFSET | ROWNTHINRANGE | WHATCOL | COLNUMBER | COLNUMBERS | COLRANGEFROM | COLRANGETO | COLNTH | COLNTHOFFSET | COLNTHINRANGE | COLHEADERIDS | COLHEADERSBYNAMES | COLHEADERNAMES | COLHEADERIGNORECASE |
|---+--------------+---------------+------------------+-------------+----------------------+-------------+---------+-----------+------------+--------------+------------+--------+--------------+---------------+--------------+-----------+------------+--------------+------------+--------+--------------+---------------+--------------+-------------------+----------------+---------------------|
| 0 | (01) EFFECTS | (03) CONTAINS | (05) TRUE | (07) 0 | (09) ROWS BY COLUMNS | (11) AND | (13) | (15) | (17) | (19) | (21) | (23) | (25) | (27) | (29) HEADERS | (31) 0 | (33) | (35) | (37) | (39) | (41) | (43) | (45) | (47) TRUE | (49) JOB_TITLE | (51) |
opcollate asz is 26, aln is 52, arg.length is 52, arg count is 1
opcollate getting dimensions of source[0] op[0]...
opcollate source[0] op[0] has 112018 rows, 4 columns.
opcollate src[0]...
opcollate src[0] processing 1 filters...
opcollate src[0] gathering args from filter 1 of 1 filters...
opcollate src[0] filter 0 "FILTERHOW" is ROWS BY COLUMNS
opcollate src[0] filter 0 is filtering by HEADERS
opcollate src[0] is collecting 2 rows
opcollate src[0] is collecting 4 columns
opcollate getting dimensions of source[1] op[1]...
opcollate source[1] op[1] has 157174 rows, 3 columns.
opcollate src[1]...
opcollate src[1] processing 1 filters...
opcollate src[1] gathering args from filter 1 of 1 filters...
opcollate src[1] filter 0 "FILTERHOW" is ROWS BY COLUMNS
opcollate src[1] filter 0 is filtering by HEADERS
opcollate src[1] is collecting 3 rows
opcollate src[1] is collecting 3 columns
opcollate getting dimensions of source[2] op[2]...
opcollate source[2] op[2] has 190750 rows, 4 columns.
opcollate src[2]...
opcollate src[2] processing 1 filters...
opcollate src[2] gathering args from filter 1 of 1 filters...
opcollate src[2] filter 0 "FILTERHOW" is ROWS BY COLUMNS
opcollate src[2] filter 0 is filtering by HEADERS
opcollate src[2] is collecting 6 rows
opcollate src[2] is collecting 4 columns
opcollate getting dimensions of source[3] op[3]...
opcollate source[3] op[3] has 117396 rows, 5 columns.
opcollate src[3]...
opcollate src[3] processing 1 filters...
opcollate src[3] gathering args from filter 1 of 1 filters...
opcollate src[3] filter 0 "FILTERHOW" is ROWS BY COLUMNS
opcollate src[3] filter 0 is filtering by HEADERS
opcollate src[3] is collecting 2 rows
opcollate src[3] is collecting 5 columns
collated headers are:
{RECEIVED_DATE,JOB_TITLE,EMPLOYER_ADDRESS1,EMPLOYER_STATE,AGENT_REPRESENTING_EMPLOYER,DECISION_DATE,AMENDED_PETITION,ORIGINAL_CERT_DATE,BEGIN_DATE,END_DATE}opcollate populating dat...
opcollate reconstructing headers...
opcollate complete.
opcollate took 32.47 ms to collate 13 rows 10 cols
ops[4].datcachecc took 191 μs
| RECEIVED_DATE | JOB_TITLE | EMPLOYER_ADDRESS1 | EMPLOYER_STATE | AGENT_REPRESENTING_EMPLOYER | DECISION_DATE | AMENDED_PETITION | ORIGINAL_CERT_DATE | BEGIN_DATE | END_DATE |
| 10/11/2019 | Effects Artist | 1000 Flower Street | CA | | | | | | |
| 3/24/2017 | VISUAL EFFECTS TECHNICAL ARTIST | 3249 S. LA CIENEGA BLVD | CA | | | | | | |
| | VISUAL EFFECTS SUPERVISOR/FLAME ARTIST | 2308 Broadway | | Y | | | | | |
| | Character Effects Artist | 1000 Flower Street | | Y | | | | | |
| | Effects Artist | 1000 Flower Street | | Y | | | | | |
| | Special Effects Video Editor | 8008 Slide Road | | | 4/10/2020 | 0 | | | |
| | Special Effects Video Editor | 8008 Slide Road | | | 4/14/2020 | 0 | | | |
| | Special Effects Video Editor | 8008 Slide Road | | | 4/15/2020 | 0 | | | |
| | Visual Effects Artist | 1338 Old Bayshore Hwy | | | 4/24/2020 | 0 | | | |
| | Special Effects Video Editor | 8008 Slide Road | | | 4/30/2020 | 0 | | | |
| | Visual Effects and Post Producer | 1025 BATTERY STREET | | | 6/2/2020 | 0 | | | |
ops[4].datprintrc took 61 μs