Context Navigation

cmset_op.pro @ 289

Last change on this file since 289 was 262, checked in by pinsard, 17 years ago
corrections of some headers and parameters and keywords case. change of pro2href to replace proidl
Property svn:keywords set to `Id`
File size: 16.0 KB

Rev	Line
[31]	1	;+
[237]	2	;
[133]	3	; @hidden
[31]	4	;
[136]	5	; @file_comments
	6	; Simplified version of CMSET_OP_UNIQ which sorts, and takes the
	7	; "first" value, whatever that may mean.
	8	;
[133]	9	; @todo seb
[237]	10	;
[133]	11	;-
[31]	12	;
[237]	13	FUNCTION cmset_op_uniq, a
[31]	14	;
[133]	15	compile_opt idl2, strictarrsubs
[31]	16	;
[133]	17	if n_elements(a) LE 1 then return, 0L
	18
	19	ii = sort(a) & b = a[ii]
	20	wh = where(b NE shift(b, +1L), ct)
	21	if ct GT 0 then return, ii[wh]
	22
	23	return, 0L
[31]	24	;
[133]	25	end
	26	;+
	27	;
[136]	28	; @file_comments
[133]	29	; Performs an AND, OR, or XOR operation between two sets
	30	;
[136]	31	; Description: SET_OP performs three common operations between two sets. The
	32	; three supported functions of OP are:
[31]	33	;
	34	; OP Meaning
	35	; 'AND' - to find the intersection of A and B;
	36	; 'OR' - to find the union of A and B;
[136]	37	; 'XOR' - to find the those elements who are members of A or B
[31]	38	; but not both;
	39	;
	40	; Sets as defined here are one dimensional arrays composed of
[136]	41	; numeric or string types. Comparisons of equality between elements
[31]	42	; are done using the IDL EQ operator.
	43	;
	44	; The complements of either set can be taken as well, by using the
[163]	45	; NOT1 and NOT2 keywords. For example, it may be desirable to find
[31]	46	; the elements in A but not B, or B but not A (they are different!).
	47	; The following IDL expressions achieve each of those effects:
	48	;
	49	; SET = CMSET_OP(A, 'AND', /NOT2, B) ; A but not B
	50	; SET = CMSET_OP(/NOT1, A, 'AND', B) ; B but not A
	51	;
	52	; Note the distinction between NOT1 and NOT2. NOT1 refers to the
	53	; first set (A) and NOT2 refers to the second (B). Their ordered
	54	; placement in the calling sequence is entirely optional, but the
	55	; above ordering makes the logical meaning explicit.
	56	;
	57	; NOT1 and NOT2 can only be set for the 'AND' operator, and never
[136]	58	; simultaneously. This is because the results of an operation with
[31]	59	; 'OR' or 'XOR' and any combination of NOTs -- or with 'AND' and
	60	; both NOTs -- formally cannot produce a defined result.
	61	;
[136]	62	; The implementation depends on the type of operands. For integer
	63	; types, a fast technique using HISTOGRAM is used. However, this
[31]	64	; algorithm becomes inefficient when the dynamic range in the data
[136]	65	; is large. For those cases, and for other data types, a technique
	66	; based on SORT() is used. Thus the compute time should scale
[31]	67	; roughly as (A+B)ALOG(A+B) or better, rather than (AB) for the
[136]	68	; brute force approach. For large arrays this is a significant
[31]	69	; benefit.
	70	;
[238]	71	; @categories
[157]	72	; Array
[31]	73	;
[136]	74	; @param A {in}{required}
	75	; The two sets to be operated on. A one dimensional array of
	76	; either numeric or string type. A and B must be of the same
	77	; type. Empty sets are permitted, and are either represented
	78	; as an undefined variable, or by setting EMPTY1 or EMPTY2.
[31]	79	;
[136]	80	; @param B {in}{required}
	81	; See A
[133]	82	;
[163]	83	; @param OP0 {in}{required}{type=string}
[136]	84	; a string, the operation to be performed. Must be one of
	85	; 'AND', 'OR' or 'XOR' (lower or mixed case is permitted).
	86	; Other operations will cause an error message to be produced.
[31]	87	;
[136]	88	; @keyword NOT1
	89	; If set and OP is 'AND', then the complement of A (for
	90	; NOT1) or B (for NOT2) will be used in the operation.
	91	; NOT1 and NOT2 cannot be set simultaneously.
[31]	92	;
[136]	93	; @keyword NOT2
	94	; See NOT1
[31]	95	;
[136]	96	; @keyword EMPTY1
	97	; If set, then A (for EMPTY1) or B (for EMPTY2) are
	98	; assumed to be the empty set. The actual values
	99	; passed as A or B are then ignored.
[133]	100	;
[136]	101	; @keyword EMPTY2
	102	; See EMPTY1
[133]	103	;
[136]	104	; @keyword INDEX
[163]	105	; if set, then return a list of indexes instead of the array
[136]	106	; values themselves. The "slower" set operations are always
	107	; performed in this case.
[31]	108	;
[163]	109	; The indexes refer to the combined array [A,B]. To
[136]	110	; clarify, in the following call: I = CMSET_OP(..., /INDEX);
	111	; returned values from 0 to NA-1 refer to A[I], and values
	112	; from NA to NA+NB-1 refer to B[I-NA].
[31]	113	;
[136]	114	; @keyword COUNT
	115	; upon return, the number of elements in the result set.
	116	; This is only important when the result set is the empty
	117	; set, in which case COUNT is set to zero.
[31]	118	;
[136]	119	; @returns
	120	; The resulting set as a one-dimensional array. The set may be
	121	; represented by either an array of data values (default), or an
[163]	122	; array of indexes (if INDEX is set). Duplicate elements, if any,
[136]	123	; are removed, and element order may not be preserved.
[31]	124	;
[136]	125	; The empty set is represented as a return value of -1L, and COUNT
	126	; is set to zero. Note that the only way to recognize the empty set
	127	; is to examine COUNT.
[31]	128	;
	129	; SEE ALSO:
	130	;
[136]	131	; SET_UTILS.PRO by RSI
[31]	132	;
[238]	133	; @history
[237]	134	; Written, CM, 23 Feb 2000
[31]	135	; Added empty set capability, CM, 25 Feb 2000
	136	; Documentation clarification, CM 02 Mar 2000
	137	; Incompatible but more consistent reworking of EMPTY keywords, CM,
	138	; 04 Mar 2000
	139	; Minor documentation clarifications, CM, 26 Mar 2000
	140	; Corrected bug in empty_arg special case, CM 06 Apr 2000
	141	; Add INDEX keyword, CM 31 Jul 2000
	142	; Clarify INDEX keyword documentation, CM 06 Sep 2000
	143	; Made INDEX keyword always force SLOW_SET_OP, CM 06 Sep 2000
	144	; Added CMSET_OP_UNIQ, and ability to select FIRST_UNIQUE or
	145	; LAST_UNIQUE values, CM, 18 Sep 2000
	146	; Removed FIRST_UNIQUE and LAST_UNIQUE, and streamlined
	147	; CMSET_OP_UNIQ until problems with SORT can be understood, CM, 20
	148	; Sep 2000 (thanks to Ben Tupper)
	149	; Still trying to get documentation of INDEX and NOT right, CM, 28
	150	; Sep 2000 (no code changes)
[84]	151	; Correct bug for AND case, when input sets A and B each only have
	152	; one unique value, and the values are equal. CM, 04 Mar 2004
	153	; (thanks to James B. jbattat at cfa dot harvard dot edu)
	154	; Add support for the cases where the input data types are mixed,
	155	; but still compatible; also, attempt to return the same data
	156	; type that was passed in; CM, 05 Feb 2005
	157	; Fix bug in type checking (thanks to "marit"), CM, 10 Dec 2005
	158	; Work around a stupidity in the built-in IDL HISTOGRAM routine,
	159	; which tries to "help" you by restricting the MIN/MAX to the
	160	; range of the input variable (thanks to Will Maddox), CM, 16 Jan 2006
[31]	161	;
[133]	162	; Author: Craig B. Markwardt, NASA/GSFC Code 662, Greenbelt, MD 20770
[136]	163	; craigm\@lheamail.gsfc.nasa.gov
[31]	164	;
[238]	165	; @version
[237]	166	; $Id$
[133]	167	;
[136]	168	; @examples
	169	; Utility function, similar to UNIQ, but allowing choice of taking
[133]	170	; first or last unique element, or non-unique elements.
	171	; Unfortunately this doesn't work because of implementation dependent
	172	; versions of the SORT() function.
	173	;
[31]	174	; function cmset_op_uniq, a, first=first, non=non, count=ct, sort=sortit
	175	; if n_elements(a) LE 1 then return, 0L
	176	; sh = (2Lkeyword_set(first)-1L)(-2L*keyword_set(non)+1)
	177	;
	178	; if keyword_set(sortit) then begin
	179	; ;; Sort it manually
[114]	180	; ii = sort(a) & b = a[ii]
[31]	181	; if keyword_set(non) then wh = where(b EQ shift(b, sh), ct) $
	182	; else wh = where(b NE shift(b, sh), ct)
[114]	183	; if ct GT 0 then return, ii[wh]
[31]	184	; endif else begin
	185	; ;; Use the user's values directly
	186	; if keyword_set(non) then wh = where(a EQ shift(a, sh), ct) $
	187	; else wh = where(a NE shift(a, sh), ct)
	188	; if ct GT 0 then return, wh
	189	; endelse
	190	;
	191	; if keyword_set(first) then return, 0L else return, n_elements(a)-1
	192	; end
	193
[133]	194	; Simplified version of CMSET_OP_UNIQ which sorts, and takes the
	195	; "first" value, whatever that may mean.
[114]	196	;
[133]	197	;-
[237]	198	;
[262]	199	FUNCTION cmset_op, a, op0, b, NOT1=not1, NOT2=not2, COUNT=count, $
	200	EMPTY1=empty1, EMPTY2=empty2, MAXARRAY=ma, INDEX=index
[114]	201	;
	202	compile_opt idl2, strictarrsubs
	203	;
[31]	204
	205	on_error, 2 ;; return on error
	206	count = 0L
	207	index0 = -1L
	208	;; Histogram technique is used for array sizes < 32,000 elements
	209	if n_elements(ma) EQ 0 then ma = 32L*1024L
	210
	211	;; Check the number of arguments
	212	if n_params() LT 3 then begin
	213	ARG_ERR:
	214	message, 'USAGE: SET = CMSET_OP(A, OP, B [, COUNT=ct])', /info
	215	message, ' KEYWORDS: /NOT1, /NOT2, /EMPTY1, /EMPTY2, INDEX', /info
	216	return, -1L
	217	endif
	218	if n_elements(op0) EQ 0 then goto, ARG_ERR
	219	kind = keyword_set(index)
	220	fst = 1L
	221	if keyword_set(last) then fst = 0L
	222	if keyword_set(first) then fst = 1L
	223
	224	;; Check the operation
	225	sz = size(op0)
[114]	226	if sz[sz[0]+1] NE 7 then begin
[31]	227	OP_ERR:
	228	message, "ERROR: OP must be 'AND', 'OR' or 'XOR'"
	229	return, -1L
	230	endif
	231	op = strupcase(op0)
	232	if op NE 'AND' AND op NE 'OR' AND op NE 'XOR' then goto, OP_ERR
	233
	234	;; Check NOT1 and NOT2
	235	if keyword_set(not1) AND keyword_set(not2) then begin
	236	message, "ERROR: NOT1 and NOT2 cannot be set simultaneously"
	237	return, -1L
	238	endif
	239	if (keyword_set(not1) OR keyword_set(not2)) AND $
	240	(op EQ 'OR' OR op EQ 'XOR') then begin
	241	message, "ERROR: NOT1 and NOT2 cannot be set with 'OR' or 'XOR'"
	242	return, -1L
	243	endif
	244
	245	;; Special cases for empty set
	246	n1 = n_elements(a) & n2 = n_elements(b)
	247	if keyword_set(empty1) then n1 = 0L
	248	if keyword_set(empty2) then n2 = 0L
	249	if n1 EQ 0 OR n2 EQ 0 then begin
	250	;; Eliminate duplicates
	251	if n1 GT 0 then a1 = cmset_op_uniq(a)
	252	if n2 GT 0 then b1 = cmset_op_uniq(b)
	253	n1 = n_elements(a1) < n1 & n2 = n_elements(b1) < n2
[136]	254	case op of
[31]	255	'OR': if n1 EQ 0 then goto, RET_A1 else goto, RET_B1
	256	'XOR': if n1 EQ 0 then goto, RET_B1 else goto, RET_A1
	257	'AND': begin
	258	if keyword_set(not1) AND n1 EQ 0 then goto, RET_B1
	259	if keyword_set(not2) AND n2 EQ 0 then goto, RET_A1
	260	return, -1L
	261	end
	262	endcase
	263	return, -1L
	264	RET_A1:
	265	count = n1
	266	if kind then begin
	267	if count GT 0 then return, a1 else return, -1L
	268	endif
[114]	269	if count GT 0 then return, a[a1] else return, -1L
[31]	270	RET_B1:
	271	count = n2
	272	if kind then begin
	273	if count GT 0 then return, b1+n1 else return, -1L
[136]	274	endif
[114]	275	if count GT 0 then return, b[b1] else return, -1L
[31]	276	endif
	277
[84]	278	;; Allow data to have different types, but they must be at least of
	279	;; the same "base" type. That is, you can't combine a number with a
	280	;; string, etc.
	281	;; basetype 0:undefined 1:real number 6:complex number 7:string
	282	;; 8:structure 10:pointer 11:object
	283
	284	;; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
	285	basetype = [0, 1, 1, 1, 1, 1, 6, 7, 8, 6,10,11, 1, 1, 1, 1]
	286
[31]	287	;; Check types of operands
[114]	288	sz1 = size(a) & tp1 = sz1[sz1[0]+1]
	289	sz2 = size(b) & tp2 = sz2[sz2[0]+1]
[84]	290	if tp1 LT 0 OR tp1 GE 16 OR tp2 LT 0 OR tp2 GE 16 then begin
	291	message, 'ERROR: unrecognized data types for operands'
	292	return, -1
	293	endif
[114]	294	if basetype[tp1] NE basetype[tp2] then begin
[31]	295	TYPE1_ERR:
	296	message, 'ERROR: both A and B must be of the same type'
	297	return, -1L
	298	endif
[84]	299	if tp1 EQ 8 OR tp1 EQ 10 OR tp1 EQ 11 then begin
[31]	300	TYPE2_ERR:
	301	message, 'ERROR: operands must be a numeric or string type'
	302	return, -1L
	303	endif
	304
	305	;; Now use two different kinds of algorithms: a slower but more
	306	;; general algorithm for generic types, and the histogram technique
	307	;; for integer types. Even for integer types, if there is too much
	308	;; dynamic range, then the slow method is used.
	309
	310	if tp1 GE 4 AND tp1 LE 9 then begin
	311	;; String and real types, or large int arrays
	312	SLOW_SET_OP:
[136]	313	case op of
[31]	314	'OR': begin
	315	uu = [a,b] ;; OR is simple; just take unique values
	316	index0 = cmset_op_uniq(uu)
	317	count = n_elements(index0)
	318	if kind then return, index0
[114]	319	return, uu[index0]
[31]	320	end
	321
	322	'XOR': begin
	323	;; Make ordered list of set union
	324	ai = cmset_op_uniq(a) & na = n_elements(ai)
	325	bi = cmset_op_uniq(b) & nb = n_elements(bi)
	326	ui = [ai, bi+n1]
[114]	327	uu = [a,b] & uu = uu[ui] ;; Raw union...
	328	us = sort(uu) & uu = uu[us] ;; ...and sort
	329	if kind then ui = ui[temporary(us)] else ui = 0
[31]	330
	331	;; Values in one set only will not have duplicates
	332	wh1 = where(uu NE shift(uu, -1), count1)
	333	if count1 EQ 0 then return, -1L
[114]	334	wh = where(wh1[1:*]-wh1 EQ 1, count)
	335	if wh1[0] EQ 0 then begin
[31]	336	if count GT 0 then wh = [-1L, wh] else wh = [-1L]
	337	count = n_elements(wh)
	338	endif
	339	if count EQ 0 then return, -1
[114]	340	if kind then return, ui[wh1[wh+1]]
	341	return, uu[wh1[wh+1]]
[31]	342	end
	343
	344	'AND': begin
	345	;; Make ordered list of set union
	346	ai = cmset_op_uniq(a) & na = n_elements(ai)
	347	bi = cmset_op_uniq(b) & nb = n_elements(bi)
	348	ui = [ai, bi+n1]
[114]	349	uu = [a,b] & uu = uu[ui] ;; Raw union...
	350	us = sort(uu) & uu = uu[us] ;; ...and sort
	351	if kind then ui = ui[us] else ui = 0
[31]	352
	353	if NOT keyword_set(not1) AND NOT keyword_set(not2) then begin
[84]	354
	355	;; Special case: if there are one in each set, and
	356	;; they are equal, then the SHIFT() technique below
	357	;; fails. Do this one by hand.
[114]	358	if na EQ 1 AND nb EQ 1 AND uu[0] EQ uu[1] then begin
[84]	359	count = 1L
	360	if kind then return, 0L
[114]	361	return, [uu[0]]
[84]	362	endif
	363
[31]	364	;; If neither "NOT" is set, then find duplicates
	365	us = 0L ;; Save memory
	366	wh = where(uu EQ shift(uu, -1L), count) ;; Find non-unique
	367	if count EQ 0 then return, -1L
	368	;; This should always select the element from A
	369	;; rather than B (the smaller of the two)
[114]	370	if kind then return, (ui[wh] < ui[wh+1])
	371	return, uu[wh]
[31]	372	endif
	373
	374	;; For "NOT" cases, we need to identify by set
	375	ii = make_array(na+nb, value=1b)
[114]	376	if keyword_set(not1) then ii[0:na-1] = 0
	377	if keyword_set(not2) then ii[na:*] = 0
	378	ii = ii[temporary(us)]
[31]	379
	380	;; Remove any duplicates
	381	wh1 = where(uu EQ shift(uu, -1L), count1) ;; Find non-unique
[114]	382	if count1 GT 0 then ii[wh1, wh1+1] = 0
[31]	383	;; Remainder is the desired set
	384	wh = where(ii, count)
	385	if count EQ 0 then return, -1L
[114]	386	if kind then return, ui[wh]
	387	return, uu[wh]
[31]	388	end
	389
	390	endcase
	391	return, -1L ;; DEFAULT CASE
	392	endif else begin
	393
	394	;; INDEX keyword forces the "slow" operation
	395	if kind then goto, SLOW_SET_OP
	396
	397	;; Integer types - use histogram technique if the data range
	398	;; is small enough, otherwise use the "slow" technique above
	399	min1 = min(a, max=max1) & min2 = min(b, max=max2)
	400	minn = min1 < min2 & maxx = max1 > max2
	401	nbins = maxx-minn+1
[114]	402	if (maxx-minn) GT floor(ma[0]) then goto, SLOW_SET_OP
[31]	403
[84]	404	;; Work around a stupidity in the built-in IDL HISTOGRAM routine
	405	if (tp1 EQ 2 OR tp2 EQ 2) AND (minn LT -32768 OR maxx GT 32767) then $
	406	goto, SLOW_SET_OP
	407
[31]	408	;; Following operations create a histogram of the integer values.
	409	ha = histogram(a, min=minn, max=maxx) < 1
	410	hb = histogram(b, min=minn, max=maxx) < 1
	411
	412	;; Compute NOT cases
[136]	413	if keyword_set(not1) then ha = 1b - ha
[31]	414	if keyword_set(not2) then hb = 1b - hb
[136]	415	case op of
[31]	416	;; Boolean operations
[136]	417	'AND': mask = temporary(ha) AND temporary(hb)
[31]	418	'OR': mask = temporary(ha) OR temporary(hb)
	419	'XOR': mask = temporary(ha) XOR temporary(hb)
	420	endcase
	421
	422	wh = where(temporary(mask), count)
	423	if count EQ 0 then return, -1L
[136]	424
[84]	425	result = temporary(wh+minn)
	426	if tp1 NE tp2 then return, result
[114]	427	szr = size(result) & tpr = szr[szr[0]+1]
[31]	428
[84]	429	;; Cast to the original type if necessary
	430	if tpr NE tp1 then begin
	431	fresult = make_array(n_elements(result), type=tp1)
[114]	432	fresult[0] = temporary(result)
[84]	433	result = temporary(fresult)
	434	endif
	435
	436	return, result
	437
[31]	438	endelse
	439
	440	return, -1L ;; DEFAULT CASE
	441	end
	442
	443	; Here is how I did the INDEX stuff with fast histogramming. It
	444	; works, but is complicated, so I forced it to go to SLOW_SET_OP.
	445	; ha = histogram(a, min=minn, max=maxx, reverse=ra) < 1
[114]	446	; rr = ra[0:nbins] & mask = rr NE rr[1:] & ra = ra[rr]mask-1L+mask
[31]	447	; hb = histogram(b, min=minn, max=maxx, reverse=rb) < 1
[114]	448	; rr = rb[0:nbins] & mask = rr NE rr[1:] & rb = rb[rr]mask-1L+mask
[31]	449	; ... AND/OR/XOR NOT masking here ...
[114]	450	; ra = ra[wh] & rb = rb[wh]
[31]	451	; return, ra(ra GE 0) + (rb+n1)(ra LT 0) ;; is last 'ra' right?
	452

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format