- qpscanner/cfcs/cfregex.cfc
- master
- 19 KB
- 552
1<!--- cfregex v0.1.003-qp | (c) Peter Boughton | License: GPLv3 | Website: https://www.sorcerersisle.com/software/cfregex --->
2<cfcomponent output=false >
3
4
5 <cffunction name="init" returntype="any" output=false>
6 <cfset StructDelete(This,'init') />
7
8 <cfset Variables.Modes =
9 { UNIX_LINES = 1
10 , CASE_INSENSITIVE = 2
11 , COMMENTS = 4
12 , MULTILINE = 8
13 , DOTALL = 32
14 , UNICODE_CASE = 64
15 , CANON_EQ = 128
16 }/>
17
18 <cfset Variables.DefaultModes = 0 />
19
20 <cfreturn This.compile(ArgumentCollection=Arguments) />
21 </cffunction>
22
23
24 <!---
25 \\\ INTERNAL \\\
26 --->
27
28 <cffunction name="parseModes" returntype="Numeric" output="false" access="private">
29 <cfargument name="ModeList" type="String" required="true" />
30 <cfargument name="IgnoreInvalidModes" type="Boolean" default="false"/>
31 <cfset var CurrentMode = ""/>
32 <cfset var ResultMode = 0/>
33
34 <cfloop index="CurrentMode" list="#Arguments.ModeList#">
35
36 <cfif isNumeric(CurrentMode)>
37 <cfset ResultMode = BitOr( ResultMode , CurrentMode )/>
38
39 <cfelseif StructKeyExists( Variables.Modes , CurrentMode )>
40 <cfset ResultMode = BitOr( ResultMode , Variables.Modes[CurrentMode] )/>
41
42 <cfelseif NOT Arguments.IgnoreInvalidModes>
43 <cfthrow
44 message = "Invalid Mode!"
45 detail = "Mode [#CurrentMode#] is not supported."
46 type = "cfRegex.Compile.InvalidMode"
47 />
48
49 </cfif>
50
51 </cfloop>
52
53 <cfreturn ResultMode />
54 </cffunction>
55
56
57 <cffunction name="compilePattern" returntype="void" output="false" access="private">
58 <cfargument name="Pattern" type="String" required="true" />
59 <cfargument name="Modes" type="String" required="true" />
60
61 <cfset Variables.PatternText = Arguments.Pattern />
62
63 <cfset Variables.ActiveModes = parseModes(Arguments.Modes) />
64
65 <cfset Variables.PatternObject = createObject("java","java.util.regex.Pattern")
66 .compile( Arguments.Pattern , Variables.ActiveModes ) />
67
68 </cffunction>
69
70
71 <cffunction name="buildMatchInfo" returntype="Struct" output="false" access="private">
72 <cfargument name="Matcher" type="any" required="true" />
73 <cfargument name="PosOffset" type="Numeric" optional />
74 <cfargument name="GroupNames" type="any" optional />
75
76 <cfset var MatchInfo =
77 { Match = Matcher.group()
78 , Groups = []
79 } />
80
81 <cfif StructKeyExists(Arguments,'PosOffset')>
82 <cfset MatchInfo.Pos = Arguments.PosOffset+Matcher.start() />
83 <cfset MatchInfo.Len = Matcher.end()-Matcher.start() />
84 </cfif>
85
86 <cfset var CurGroup = 0 />
87 <cfloop index="CurGroup" from=1 to=#Matcher.groupCount()#>
88 <cfif StructKeyExists(Arguments,'PosOffset')>
89 <cfset MatchInfo.Groups[CurGroup] =
90 { Pos = Arguments.PosOffset+Matcher.start(CurGroup)
91 , Len = Matcher.end(CurGroup)-Matcher.start(CurGroup)
92 , Match = Matcher.group(JavaCast('int',CurGroup))
93 } />
94 <cfelse>
95 <cfset MatchInfo.Groups[CurGroup] = Matcher.group(JavaCast('int',CurGroup)) />
96 </cfif>
97 </cfloop>
98
99 <cfif StructKeyExists(Arguments,'GroupNames')>
100 <cfif isSimpleValue(Arguments.GroupNames)>
101 <cfset Arguments.GroupNames = ListToArray(Arguments.GroupNames) />
102 </cfif>
103 <cfif ArrayLen(Arguments.GroupNames)>
104 <cfset var i = 0 />
105 <cfset MatchInfo.NamedGroups = {} />
106 <cfloop index="i" from="1" to="#Min(ArrayLen(Arguments.GroupNames),ArrayLen(MatchInfo.Groups))#">
107 <cfset MatchInfo.NamedGroups[ Arguments.GroupNames[i] ] = MatchInfo.Groups[i] />
108 </cfloop>
109 </cfif>
110 </cfif>
111
112 <cfreturn MatchInfo />
113 </cffunction>
114
115
116 <!---
117 /// INTERNAL ///
118 --->
119
120
121 <cffunction name="compile" returntype="cfRegex" output="false" access="public" action>
122 <cfargument name="Pattern" type="String" required="true" />
123 <cfargument name="Modes" type="String" default="#Variables.DefaultModes#" />
124 <cfset StructDelete(This,'compile') />
125
126 <cfset compilePattern(ArgumentCollection=Arguments) />
127
128 <cfreturn this />
129 </cffunction>
130
131 <!---
132 \\\ EXTERNAL \\\
133 --->
134
135 <cffunction name="find" returntype="Array" output="false" access="public" action>
136 <cfargument name="Text" type="String" required="true" />
137 <cfargument name="Start" type="Numeric" default=1 />
138 <cfargument name="Limit" type="Numeric" default=0 />
139 <cfargument name="ReturnType" type="String" default="pos" />
140
141 <cfif NOT ListFindNoCase('pos,sub,info',Arguments.ReturnType)>
142 <cfthrow message="Unknown returntype" />
143 </cfif>
144
145 <cfset var Offset = Max(1,Arguments.Start) />
146 <cfif Offset GT 1>
147 <cfset Arguments.Text = mid(Arguments.Text,Offset,Len(Arguments.Text)) />
148 </cfif>
149
150 <cfset var Matcher = Variables.PatternObject.Matcher(Arguments.Text) />
151 <cfset var Results = [] />
152
153 <cfloop condition="Matcher.find()">
154 <cfswitch expression=#LCase(Arguments.ReturnType)#>
155 <cfcase value="pos">
156 <cfset var CurMatch = Offset+Matcher.start() />
157 </cfcase>
158 <cfcase value="sub">
159 <cfset var CurMatch =
160 { pos = [Offset+Matcher.start()]
161 , len = [Matcher.end()-Matcher.start()]
162 } />
163 <cfloop index="local.CurGroup" from=1 to=#Matcher.groupCount()#>
164 <cfset ArrayAppend(CurMatch.pos,Offset+Matcher.start(CurGroup)) />
165 <cfset ArrayAppend(CurMatch.len,Matcher.end(CurGroup)-Matcher.start(CurGroup)) />
166 </cfloop>
167 </cfcase>
168 <cfcase value="info">
169 <cfset var CurMatch = buildMatchInfo(Matcher,Offset) />
170 </cfcase>
171 </cfswitch>
172 <cfset ArrayAppend( Results , CurMatch ) />
173
174 <cfif ArrayLen(Results) EQ Arguments.Limit>
175 <cfbreak />
176 </cfif>
177 </cfloop>
178
179 <cfreturn Results />
180 </cffunction>
181
182
183 <cffunction name="match" returntype="Array" output="false" access="public" action>
184 <cfargument name="Text" type="String" required="true" />
185 <cfargument name="Start" type="Numeric" optional />
186 <cfargument name="Limit" type="Numeric" default=0 />
187 <cfargument name="ReturnType" type="String" default="match" hint="match|groups|namedgroups|full" />
188 <cfargument name="GroupNames" type="any" default="" hint="Required if returnType=NamedGroup." />
189 <cfargument name="Callback" type="any" optional hint="Function called to determine if a match is included in results." />
190 <cfargument name="CallbackData" type="Struct" optional hint="Extra data which is passed in to callback function." />
191
192 <cfif NOT ListFindNoCase('match,groups,namedgroups,full',Arguments.ReturnType)>
193 <cfthrow message="Unknown returntype" />
194 </cfif>
195
196 <cfset var Offset = 1 />
197 <cfif StructKeyExists(Arguments,'Start') AND Arguments.Start>
198 <cfset Arguments.Text = mid(Arguments.Text,Arguments.Start,Len(Arguments.Text)) />
199 <cfset Offset = Arguments.Start+1 />
200 </cfif>
201
202 <cfset var Matcher = Variables.PatternObject.Matcher(Arguments.Text) />
203 <cfset var Results = [] />
204
205 <cfif StructKeyExists(Arguments,'GroupNames') AND isSimpleValue(Arguments.GroupNames)>
206 <cfset Arguments.GroupNames = ListToArray(Arguments.GroupNames) />
207 </cfif>
208
209 <cfloop condition="Matcher.find()">
210
211 <cfif StructKeyExists(Arguments,'Callback')>
212 <cfif NOT StructKeyExists(Arguments,'CallbackData')>
213 <cfset Arguments.CallbackData = {} />
214 </cfif>
215 <cfif NOT Arguments.Callback( ArgumentCollection=buildMatchInfo(Matcher,Offset,Arguments.GroupNames) , Data=Arguments.CallbackData )>
216 <cfcontinue />
217 </cfif>
218 </cfif>
219
220 <cfswitch expression=#Arguments.ReturnType#>
221 <cfcase value="match">
222 <cfset var CurMatch = Matcher.Group() />
223 </cfcase>
224 <cfcase value="groups">
225 <cfset var CurMatch = [] />
226 <cfloop index="local.CurGroup" from=1 to=#Matcher.groupCount()#>
227 <cfset CurMatch[CurGroup] = Matcher.group(JavaCast('int',CurGroup)) />
228 </cfloop>
229 </cfcase>
230 <cfcase value="namedgroups">
231 <cfset var CurMatch = {} />
232 <cfloop index="local.CurGroup" from=1 to=#Matcher.groupCount()#>
233 <cfset CurMatch[Arguments.GroupNames[CurGroup]] = Matcher.group(JavaCast('int',CurGroup)) />
234 </cfloop>
235 </cfcase>
236 <cfcase value="full">
237 <cfset var CurMatch = buildMatchInfo(Matcher=Matcher,GroupNames=Arguments.GroupNames) />
238 </cfcase>
239 </cfswitch>
240
241 <cfset ArrayAppend( Results , CurMatch ) />
242
243 <cfif ArrayLen(Results) EQ Arguments.Limit>
244 <cfbreak />
245 </cfif>
246 </cfloop>
247
248 <cfreturn Results />
249 </cffunction>
250
251
252 <cffunction name="matches" returntype="any" output="false" access="public" action>
253 <cfargument name="Text" type="String" required="true" />
254 <cfargument name="ReturnType" type="String" optional hint="exact,partial,start,end,count" />
255
256 <cfif StructKeyExists(Arguments,'ReturnType')>
257 <cfset Arguments.ReturnType = LCase(Arguments.ReturnType) />
258
259 <!--- INFO: If no unnamed args, don't waste time checking for them. --->
260 <cfelseif StructCount(arguments) EQ 2>
261 <cfset Arguments.ReturnType = 'exact' />
262
263 <cfelse>
264 <cfif StructKeyExists(Arguments,'Exact') AND Arguments.Exact >
265 <cfset Arguments.ReturnType = "exact" />
266 <cfelseif StructKeyExists(Arguments,'Count') AND Arguments.Count >
267 <cfset Arguments.ReturnType = "count" />
268 <cfelse>
269 <cfif StructKeyExists(Arguments,'at')>
270 <cfif Arguments.At EQ 'anywhere'>
271 <cfset Arguments.ReturnType = 'partial' />
272 <cfelse>
273 <cfset Arguments.ReturnType = LCase(Arguments.At) />
274 </cfif>
275 <cfelseif StructKeyExists(Arguments,'Partial') AND Arguments.Partial >
276 <cfset Arguments.ReturnType = "partial" />
277 </cfif>
278 </cfif>
279 <cfif NOT StructKeyExists(Arguments,'ReturnType')>
280 <cfset Arguments.ReturnType = 'exact' />
281 </cfif>
282 </cfif>
283
284 <cfswitch expression="#Arguments.ReturnType#">
285 <cfcase value="exact">
286 <cfreturn Variables.PatternObject.Matcher(Arguments.Text).matches() />
287 </cfcase>
288 <cfcase value="count">
289 <cfset var Matcher = Variables.PatternObject.Matcher(Arguments.Text) />
290 <cfset local.Count = 0 />
291 <cfloop condition="Matcher.find()">
292 <cfset local.Count++ />
293 </cfloop>
294 <cfreturn local.Count />
295 </cfcase>
296 <cfcase value="start">
297 <cfreturn Variables.PatternObject.Matcher(Arguments.Text).lookingAt() />
298 </cfcase>
299 <cfcase value="end">
300 <cfset var Matcher = Variables.PatternObject.Matcher(Arguments.Text) />
301 <cfset var LastPos = -1 />
302 <cfloop condition="Matcher.find()">
303 <cfset LastPos = Matcher.end() />
304 </cfloop>
305 <cfreturn (LastPos EQ Len(Arguments.Text)) />
306 </cfcase>
307 <cfcase value="partial">
308 <cfreturn Variables.PatternObject.Matcher(Arguments.Text).find() />
309 </cfcase>
310 <cfdefaultcase>
311 <cfthrow
312 message = "Invalid ReturnType '#Arguments.ReturnType#' for matches"
313 type = "cfRegex.Match.InvalidArgument.ReturnType"
314 />
315 </cfdefaultcase>
316 </cfswitch>
317 </cffunction>
318
319
320 <cffunction name="escape" returntype="String" output="false" access="public" action>
321 <cfargument name="ReturnType" type="String" default=REGEX hint="regex|class" />
322 <cfif NOT ListFind('regex,class',LCase(Arguments.ReturnType))>
323 <cfthrow
324 message = "Invalid Argument ReturnType, received [#Arguments.ReturnType#]"
325 detail = "ReturnType value must be one of 'regex' OR 'class'."
326 type = "cfRegex.Escape.InvalidArgument.ReturnType"
327 />
328 </cfif>
329 <cfif NOT StructKeyExists(Variables,'Escaped#Arguments.ReturnType#')>
330 <cfif Arguments.ReturnType EQ 'regex'>
331 <cfset Variables.EscapedRegex = Variables.PatternText.replaceAll('[$^*()+\[\]{}.?\\|]','\\$0') />
332 <cfelse>
333 <cfset Variables.EscapedClass = Variables.PatternText
334 .replaceAll('(.)(?=.*?\1)','')
335 .replaceAll('(^\^|[\\\-\[\]])','\\$0')
336 .replaceAll(chr(9),'\t')
337 .replaceAll(chr(10),'\n')
338 .replaceAll(chr(13),'\r')
339 />
340 </cfif>
341 <cfif BitAnd(Variables.ActiveModes,Variables.Modes['COMMENTS']) >
342 <cfset Variables['Escaped#Arguments.ReturnType#'] = Variables['Escaped#Arguments.ReturnType#'].replaceAll('##| ','\\$0') />
343 </cfif>
344 </cfif>
345 <cfreturn Variables['Escaped#Arguments.ReturnType#'] />
346 </cffunction>
347
348
349 <cffunction name="quote" returntype="String" output="false" access="public" action>
350 <cfif NOT StructKeyExists(Variables,'Quoted')>
351 <cfset Variables.Quoted = createObject("java","java.util.regex.Pattern").quote(Variables.PatternText) />
352 </cfif>
353 <cfreturn Variables.Quoted />
354 </cffunction>
355
356
357 <cffunction name="replace" returntype="String" output="false" access="public" action>
358 <cfargument name="Text" type="String" required="true" />
359 <cfargument name="Replacement" type="Any" optional hint="String,Array,Function"/>
360 <cfargument name="Start" type="Numeric" optional />
361 <cfargument name="Limit" type="Numeric" default=0 />
362 <cfargument name="GroupNames" type="any" default="" hint="Passed into Callback function if provided" />
363 <cfargument name="CallbackData" type="Struct" optional hint="Extra data which is passed in to callback function." />
364
365 <cfif StructKeyExists(Arguments,'Callback') >
366 <cfset Arguments.Replacement = Arguments.Callback />
367 <cfelseif NOT StructKeyExists(Arguments,'Replacement')>
368 <cfthrow
369 message = "Missing Argument Replacement"
370 type = "cfRegex.Replace.MissingArgument"
371 />
372 </cfif>
373
374 <cfset var Prefix = "" />
375 <cfset var Offset = 1 />
376 <cfif StructKeyExists(Arguments,'Start') AND Arguments.Start >
377 <cfset Offset = Arguments.Start+1 />
378 <cfset Prefix = Left(Arguments.Text,Arguments.Start) />
379 <cfset Arguments.Text = Mid(Arguments.Text,Arguments.Start+1,Len(Arguments.Text)) />
380 </cfif>
381
382 <cfset var Matcher = Variables.PatternObject.Matcher( Arguments.Text )/>
383 <cfset var Results = createObject("java","java.lang.StringBuffer").init(Prefix)/>
384 <cfset var ReplacementsMade = 0 />
385 <cfset var ReplacePos = 1 />
386
387 <cfif NOT StructKeyExists(Arguments,'CallbackData')>
388 <cfset Arguments.CallbackData = {} />
389 </cfif>
390
391 <cfloop condition="Matcher.find()">
392
393 <cfif isSimpleValue(Arguments.Replacement)>
394 <cfset Matcher.appendReplacement( Results , Arguments.Replacement )/>
395
396 <cfelseif isArray(Arguments.Replacement)>
397
398 <cfif isSimpleValue(Arguments.Replacement[ReplacePos])>
399 <cfset Matcher.appendReplacement( Results , Arguments.Replacement[ReplacePos] )/>
400 <cfelse>
401 <cfset var CurrentReplaceFunc = Arguments.Replacement[ReplacePos] />
402<cfset Matcher.appendReplacement
403 ( Results
404 , CurrentReplaceFunc( ArgumentCollection=buildMatchInfo(Matcher,Offset,Arguments.GroupNames) , Data = Arguments.CallbackData )
405 )/>
406 </cfif>
407
408 <cfif ++ReplacePos GT ArrayLen(Arguments.Replacement)>
409 <cfset ReplacePos = 1 />
410 </cfif>
411
412 <cfelse>
413
414 <cfset Matcher.appendReplacement
415 ( Results
416 , Arguments.Replacement( ArgumentCollection=buildMatchInfo(Matcher,Offset,Arguments.GroupNames) , Data = Arguments.CallbackData )
417 )/>
418
419 </cfif>
420
421 <cfif ++ReplacementsMade EQ Arguments.Limit>
422 <cfbreak/>
423 </cfif>
424
425 </cfloop>
426
427 <cfset Matcher.appendTail(Results)/>
428
429 <cfreturn Results.toString() />
430 </cffunction>
431
432
433 <cffunction name="split" returntype="Array" output="false" access="public" action>
434 <cfargument name="Text" type="String" required="true" />
435 <cfargument name="Start" type="Numeric" optional />
436 <cfargument name="Limit" type="Numeric" default=0 hint="The maximum number of times a split is made (i.e. limit+1=max array size)"/>
437 <cfargument name="GroupNames" type="any" default="" hint="Passed into Callback function if provided" />
438 <cfargument name="Callback" type="any" optional />
439 <cfargument name="CallbackData" type="Struct" optional hint="Extra data which is passed in to callback function." />
440
441 <cfset var Offset = 1 />
442 <cfif StructKeyExists(Arguments,'Start') AND Arguments.Start >
443 <cfset var Prefix = Left(Arguments.Text,Arguments.Start) />
444 <cfset Offset = 1+Arguments.Start />
445 <cfset Arguments.Text = Mid(Arguments.Text,Arguments.Start+1,Len(Arguments.Text)) />
446 </cfif>
447
448 <cfif StructKeyExists(Arguments,'Callback')>
449 <cfset var Matcher = Variables.PatternObject.Matcher( Arguments.Text )/>
450 <cfset var TextPos = 1 />
451 <cfset var ArrayPos = 1 />
452 <cfset var Results = [''] />
453 <cfif NOT StructKeyExists(Arguments,'CallbackData')>
454 <cfset Arguments.CallbackData = {} />
455 </cfif>
456
457 <cfloop condition="Matcher.find(TextPos-1)">
458
459 <cfif Arguments.Callback( ArgumentCollection=buildMatchInfo(Matcher,Offset,Arguments.GroupNames) , Data=Arguments.CallbackData )>
460
461 <cfset Results[ArrayPos] &= mid(Arguments.Text,TextPos,Matcher.start()+1-TextPos) />
462 <cfset TextPos = Matcher.end()+1 />
463
464 <cfset ArrayPos++ />
465 <cfset Results[ArrayPos] = '' />
466
467 <cfif Arguments.Limit AND ArrayLen(Results) GT Arguments.Limit>
468 <cfbreak />
469 </cfif>
470 <cfelse>
471 <cfset Results[ArrayPos] &= mid(Arguments.Text,TextPos,Matcher.end()+1-TextPos) />
472 <cfset TextPos = Matcher.end()+1 />
473 </cfif>
474
475 </cfloop>
476
477 <cfset Results[ArrayPos] &= mid(Arguments.Text,TextPos,len(Arguments.Text)) />
478
479 <cfelse>
480 <cfif Arguments.Limit>
481 <!---
482 NOTE:
483 For java.util.regex, limit is array length.
484 For cfregex, limit is number of times the action occurs.
485 Therefor, must add one...
486 --->
487 <cfset var Results = Variables.PatternObject.split(Arguments.Text,Arguments.Limit+1) />
488 <cfelse>
489 <cfset var Results = Variables.PatternObject.split(Arguments.Text) />
490 </cfif>
491 </cfif>
492
493 <cfif isDefined('Prefix') AND ArrayLen(Results)>
494 <cfset Results[1] = Prefix & Results[1] />
495 </cfif>
496
497 <cfreturn Results />
498 </cffunction>
499
500 <!---
501 /// EXTERNAL ///
502 --->
503
504
505
506 <!---
507 CALLBACK SAMPLES
508
509 A callback function can be used with the following functions:
510 .replace
511 .match
512 .split
513
514 A callback is called each time a match is found, and allows for
515 conditional behaviour to be executed at this point,
516 to change how the function behaves towards the match.
517
518 A Replace Callback determines what text to use for replacement.
519 A Match Callback determines whether to include or exclude the match in results.
520 A Split Callback determines whether to split or not at the match.
521
522 The callbacks are identical except for returntype.
523 (For Replace it returns text, for everything else, it returns a boolean.)
524
525 See https://docs.sorcerersisle.com/cfregex/Callbacks
526
527 <cffunction name="ReplaceCallback" returntype="String" output="false">
528 <cfargument name="Pos" type="Numeric" required="true" hint="The start position of the match." />
529 <cfargument name="Len" type="Numeric" required="true" hint="The length of the match." />
530 <cfargument name="Match" type="String" required="true" hint="The text of the match." />
531 <cfargument name="Groups" type="Array" required="true" hint="Array of group information." />
532 <cfargument name="NamedGroups" type="Struct" optional hint="Struct of named group information." />
533 <cfargument name="Data" type="Struct" optional hint="Struct containing passed-in data." />
534
535 <cfreturn 'replacement text' />
536 </cffunction>
537
538
539 <cffunction name="BooleanCallback" returntype="Boolean" output="false">
540 <cfargument name="Pos" type="Numeric" required="true" hint="The start position of the match." />
541 <cfargument name="Len" type="Numeric" required="true" hint="The length of the match." />
542 <cfargument name="Match" type="String" required="true" hint="The text of the match." />
543 <cfargument name="Groups" type="Array" required="true" hint="Array of group information." />
544 <cfargument name="NamedGroups" type="Struct" optional hint="Struct of named group information." />
545 <cfargument name="Data" type="Struct" optional hint="Struct containing passed-in data." />
546
547 <cfreturn true />
548 </cffunction>
549
550 --->
551
552
553</cfcomponent>