1 /*
2 * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26 package java.util.regex;
27
28 import java.util.ConcurrentModificationException;
29 import java.util.Iterator;
30 import java.util.NoSuchElementException;
31 import java.util.Objects;
32 import java.util.Spliterator;
33 import java.util.Spliterators;
34 import java.util.function.Consumer;
35 import java.util.function.Function;
36 import java.util.stream.Stream;
37 import java.util.stream.StreamSupport;
38
39 /**
40 * An engine that performs match operations on a {@linkplain
41 * java.lang.CharSequence character sequence} by interpreting a {@link Pattern}.
42 *
43 * <p> A matcher is created from a pattern by invoking the pattern's {@link
44 * Pattern#matcher matcher} method. Once created, a matcher can be used to
45 * perform three different kinds of match operations:
46 *
47 * <ul>
48 *
49 * <li><p> The {@link #matches matches} method attempts to match the entire
50 * input sequence against the pattern. </p></li>
51 *
52 * <li><p> The {@link #lookingAt lookingAt} method attempts to match the
53 * input sequence, starting at the beginning, against the pattern. </p></li>
54 *
55 * <li><p> The {@link #find find} method scans the input sequence looking
56 * for the next subsequence that matches the pattern. </p></li>
57 *
58 * </ul>
59 *
60 * <p> Each of these methods returns a boolean indicating success or failure.
61 * More information about a successful match can be obtained by querying the
62 * state of the matcher.
63 *
64 * <p> A matcher finds matches in a subset of its input called the
65 * <i>region</i>. By default, the region contains all of the matcher's input.
66 * The region can be modified via the {@link #region(int, int) region} method
67 * and queried via the {@link #regionStart() regionStart} and {@link
68 * #regionEnd() regionEnd} methods. The way that the region boundaries interact
69 * with some pattern constructs can be changed. See {@link
70 * #useAnchoringBounds(boolean) useAnchoringBounds} and {@link
71 * #useTransparentBounds(boolean) useTransparentBounds} for more details.
72 *
73 * <p> This class also defines methods for replacing matched subsequences with
74 * new strings whose contents can, if desired, be computed from the match
75 * result. The {@link #appendReplacement appendReplacement} and {@link
76 * #appendTail appendTail} methods can be used in tandem in order to collect
77 * the result into an existing string buffer or string builder. Alternatively,
78 * the more convenient {@link #replaceAll replaceAll} method can be used to
79 * create a string in which every matching subsequence in the input sequence
80 * is replaced.
81 *
82 * <p> The explicit state of a matcher includes the start and end indices of
83 * the most recent successful match. It also includes the start and end
84 * indices of the input subsequence captured by each <a
85 * href="Pattern.html#cg">capturing group</a> in the pattern as well as a total
86 * count of such subsequences. As a convenience, methods are also provided for
87 * returning these captured subsequences in string form.
88 *
89 * <p> The explicit state of a matcher is initially undefined; attempting to
90 * query any part of it before a successful match will cause an {@link
91 * IllegalStateException} to be thrown. The explicit state of a matcher is
92 * recomputed by every match operation.
93 *
94 * <p> The implicit state of a matcher includes the input character sequence as
95 * well as the <i>append position</i>, which is initially zero and is updated
96 * by the {@link #appendReplacement appendReplacement} method.
97 *
98 * <p> A matcher may be reset explicitly by invoking its {@link #reset()}
99 * method or, if a new input sequence is desired, its {@link
100 * #reset(java.lang.CharSequence) reset(CharSequence)} method. Resetting a
101 * matcher discards its explicit state information and sets the append position
102 * to zero.
103 *
104 * <p> Instances of this class are not safe for use by multiple concurrent
105 * threads. </p>
106 *
107 *
108 * @author Mike McCloskey
109 * @author Mark Reinhold
110 * @author JSR-51 Expert Group
111 * @since 1.4
112 * @spec JSR-51
113 */
114
115 public final class Matcher implements MatchResult {
116
117 /**
118 * The Pattern object that created this Matcher.
119 */
120 Pattern parentPattern;
121
122 /**
123 * The storage used by groups. They may contain invalid values if
124 * a group was skipped during the matching.
125 */
126 int[] groups;
127
128 /**
129 * The range within the sequence that is to be matched. Anchors
130 * will match at these "hard" boundaries. Changing the region
131 * changes these values.
132 */
133 int from, to;
134
135 /**
136 * Lookbehind uses this value to ensure that the subexpression
137 * match ends at the point where the lookbehind was encountered.
138 */
139 int lookbehindTo;
140
141 /**
142 * The original string being matched.
143 */
144 CharSequence text;
145
146 /**
147 * Matcher state used by the last node. NOANCHOR is used when a
148 * match does not have to consume all of the input. ENDANCHOR is
149 * the mode used for matching all the input.
150 */
151 static final int ENDANCHOR = 1;
152 static final int NOANCHOR = 0;
153 int acceptMode = NOANCHOR;
154
155 /**
156 * The range of string that last matched the pattern. If the last
157 * match failed then first is -1; last initially holds 0 then it
158 * holds the index of the end of the last match (which is where the
159 * next search starts).
160 */
161 int first = -1, last = 0;
162
163 /**
164 * The end index of what matched in the last match operation.
165 */
166 int oldLast = -1;
167
168 /**
169 * The index of the last position appended in a substitution.
170 */
171 int lastAppendPosition = 0;
172
173 /**
174 * Storage used by nodes to tell what repetition they are on in
175 * a pattern, and where groups begin. The nodes themselves are stateless,
176 * so they rely on this field to hold state during a match.
177 */
178 int[] locals;
179
180 /**
181 * Storage used by top greedy Loop node to store a specific hash set to
182 * keep the beginning index of the failed repetition match. The nodes
183 * themselves are stateless, so they rely on this field to hold state
184 * during a match.
185 */
186 IntHashSet[] localsPos;
187
188 /**
189 * Boolean indicating whether or not more input could change
190 * the results of the last match.
191 *
192 * If hitEnd is true, and a match was found, then more input
193 * might cause a different match to be found.
194 * If hitEnd is true and a match was not found, then more
195 * input could cause a match to be found.
196 * If hitEnd is false and a match was found, then more input
197 * will not change the match.
198 * If hitEnd is false and a match was not found, then more
199 * input will not cause a match to be found.
200 */
201 boolean hitEnd;
202
203 /**
204 * Boolean indicating whether or not more input could change
205 * a positive match into a negative one.
206 *
207 * If requireEnd is true, and a match was found, then more
208 * input could cause the match to be lost.
209 * If requireEnd is false and a match was found, then more
210 * input might change the match but the match won't be lost.
211 * If a match was not found, then requireEnd has no meaning.
212 */
213 boolean requireEnd;
214
215 /**
216 * If transparentBounds is true then the boundaries of this
217 * matcher's region are transparent to lookahead, lookbehind,
218 * and boundary matching constructs that try to see beyond them.
219 */
220 boolean transparentBounds = false;
221
222 /**
223 * If anchoringBounds is true then the boundaries of this
224 * matcher's region match anchors such as ^ and $.
225 */
226 boolean anchoringBounds = true;
227
228 /**
229 * Number of times this matcher's state has been modified
230 */
231 int modCount;
232
233 /**
234 * No default constructor.
235 */
236 Matcher() {
237 }
238
239 /**
240 * All matchers have the state used by Pattern during a match.
241 */
242 Matcher(Pattern parent, CharSequence text) {
243 this.parentPattern = parent;
244 this.text = text;
245
246 // Allocate state storage
247 if (parent.capturingGroupCount > 10) {
248 groups = new int[parent.capturingGroupCount * 2];
249 } else {
250 groups = new int[20];
251 }
252 locals = new int[parent.localCount];
253 localsPos = new IntHashSet[parent.localTCNCount];
254
255 // Put fields into initial states
256 reset();
257 }
258
259 /**
260 * Returns the pattern that is interpreted by this matcher.
261 *
262 * @return The pattern for which this matcher was created
263 */
264 public Pattern pattern() {
265 return parentPattern;
266 }
267
268 /**
269 * Returns the match state of this matcher as a {@link MatchResult}.
270 * The result is unaffected by subsequent operations performed upon this
271 * matcher.
272 *
273 * @return a {@code MatchResult} with the state of this matcher
274 * @since 1.5
275 */
276 public MatchResult toMatchResult() {
277 return toMatchResult(text.toString());
278 }
279
280 private MatchResult toMatchResult(String text) {
281 return new ImmutableMatchResult(this.first,
282 this.last,
283 groupCount(),
284 this.groups.clone(),
285 text);
286 }
287
288 private static class ImmutableMatchResult implements MatchResult {
289 private final int first;
290 private final int last;
291 private final int[] groups;
292 private final int groupCount;
293 private final String text;
294
295 ImmutableMatchResult(int first, int last, int groupCount,
296 int groups[], String text)
297 {
298 this.first = first;
299 this.last = last;
300 this.groupCount = groupCount;
301 this.groups = groups;
302 this.text = text;
303 }
304
305 @Override
306 public int start() {
307 checkMatch();
308 return first;
309 }
310
311 @Override
312 public int start(int group) {
313 checkMatch();
314 if (group < 0 || group > groupCount)
315 throw new IndexOutOfBoundsException("No group " + group);
316 return groups[group * 2];
317 }
318
319 @Override
320 public int end() {
321 checkMatch();
322 return last;
323 }
324
325 @Override
326 public int end(int group) {
327 checkMatch();
328 if (group < 0 || group > groupCount)
329 throw new IndexOutOfBoundsException("No group " + group);
330 return groups[group * 2 + 1];
331 }
332
333 @Override
334 public int groupCount() {
335 return groupCount;
336 }
337
338 @Override
339 public String group() {
340 checkMatch();
341 return group(0);
342 }
343
344 @Override
345 public String group(int group) {
346 checkMatch();
347 if (group < 0 || group > groupCount)
348 throw new IndexOutOfBoundsException("No group " + group);
349 if ((groups[group*2] == -1) || (groups[group*2+1] == -1))
350 return null;
351 return text.subSequence(groups[group * 2], groups[group * 2 + 1]).toString();
352 }
353
354 private void checkMatch() {
355 if (first < 0)
356 throw new IllegalStateException("No match found");
357
358 }
359 }
360
361 /**
362 * Changes the {@code Pattern} that this {@code Matcher} uses to
363 * find matches with.
364 *
365 * <p> This method causes this matcher to lose information
366 * about the groups of the last match that occurred. The
367 * matcher's position in the input is maintained and its
368 * last append position is unaffected.</p>
369 *
370 * @param newPattern
371 * The new pattern used by this matcher
372 * @return This matcher
373 * @throws IllegalArgumentException
374 * If newPattern is {@code null}
375 * @since 1.5
376 */
377 public Matcher usePattern(Pattern newPattern) {
378 if (newPattern == null)
379 throw new IllegalArgumentException("Pattern cannot be null");
380 parentPattern = newPattern;
381
382 // Reallocate state storage
383 int parentGroupCount = Math.max(newPattern.capturingGroupCount, 10);
384 groups = new int[parentGroupCount * 2];
385 locals = new int[newPattern.localCount];
386 for (int i = 0; i < groups.length; i++)
387 groups[i] = -1;
388 for (int i = 0; i < locals.length; i++)
389 locals[i] = -1;
390 localsPos = new IntHashSet[parentPattern.localTCNCount];
391 modCount++;
392 return this;
393 }
394
395 /**
396 * Resets this matcher.
397 *
398 * <p> Resetting a matcher discards all of its explicit state information
399 * and sets its append position to zero. The matcher's region is set to the
400 * default region, which is its entire character sequence. The anchoring
401 * and transparency of this matcher's region boundaries are unaffected.
402 *
403 * @return This matcher
404 */
405 public Matcher reset() {
406 first = -1;
407 last = 0;
408 oldLast = -1;
409 for(int i=0; i<groups.length; i++)
410 groups[i] = -1;
411 for(int i=0; i<locals.length; i++)
412 locals[i] = -1;
413 for (int i = 0; i < localsPos.length; i++) {
414 if (localsPos[i] != null)
415 localsPos[i].clear();
416 }
417 lastAppendPosition = 0;
418 from = 0;
419 to = getTextLength();
420 modCount++;
421 return this;
422 }
423
424 /**
425 * Resets this matcher with a new input sequence.
426 *
427 * <p> Resetting a matcher discards all of its explicit state information
428 * and sets its append position to zero. The matcher's region is set to
429 * the default region, which is its entire character sequence. The
430 * anchoring and transparency of this matcher's region boundaries are
431 * unaffected.
432 *
433 * @param input
434 * The new input character sequence
435 *
436 * @return This matcher
437 */
438 public Matcher reset(CharSequence input) {
439 text = input;
440 return reset();
441 }
442
443 /**
444 * Returns the start index of the previous match.
445 *
446 * @return The index of the first character matched
447 *
448 * @throws IllegalStateException
449 * If no match has yet been attempted,
450 * or if the previous match operation failed
451 */
452 public int start() {
453 if (first < 0)
454 throw new IllegalStateException("No match available");
455 return first;
456 }
457
458 /**
459 * Returns the start index of the subsequence captured by the given group
460 * during the previous match operation.
461 *
462 * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left
463 * to right, starting at one. Group zero denotes the entire pattern, so
464 * the expression <i>m.</i>{@code start(0)} is equivalent to
465 * <i>m.</i>{@code start()}. </p>
466 *
467 * @param group
468 * The index of a capturing group in this matcher's pattern
469 *
470 * @return The index of the first character captured by the group,
471 * or {@code -1} if the match was successful but the group
472 * itself did not match anything
473 *
474 * @throws IllegalStateException
475 * If no match has yet been attempted,
476 * or if the previous match operation failed
477 *
478 * @throws IndexOutOfBoundsException
479 * If there is no capturing group in the pattern
480 * with the given index
481 */
482 public int start(int group) {
483 if (first < 0)
484 throw new IllegalStateException("No match available");
485 if (group < 0 || group > groupCount())
486 throw new IndexOutOfBoundsException("No group " + group);
487 return groups[group * 2];
488 }
489
490 /**
491 * Returns the start index of the subsequence captured by the given
492 * <a href="Pattern.html#groupname">named-capturing group</a> during the
493 * previous match operation.
494 *
495 * @param name
496 * The name of a named-capturing group in this matcher's pattern
497 *
498 * @return The index of the first character captured by the group,
499 * or {@code -1} if the match was successful but the group
500 * itself did not match anything
501 *
502 * @throws IllegalStateException
503 * If no match has yet been attempted,
504 * or if the previous match operation failed
505 *
506 * @throws IllegalArgumentException
507 * If there is no capturing group in the pattern
508 * with the given name
509 * @since 1.8
510 */
511 public int start(String name) {
512 return groups[getMatchedGroupIndex(name) * 2];
513 }
514
515 /**
516 * Returns the offset after the last character matched.
517 *
518 * @return The offset after the last character matched
519 *
520 * @throws IllegalStateException
521 * If no match has yet been attempted,
522 * or if the previous match operation failed
523 */
524 public int end() {
525 if (first < 0)
526 throw new IllegalStateException("No match available");
527 return last;
528 }
529
530 /**
531 * Returns the offset after the last character of the subsequence
532 * captured by the given group during the previous match operation.
533 *
534 * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left
535 * to right, starting at one. Group zero denotes the entire pattern, so
536 * the expression <i>m.</i>{@code end(0)} is equivalent to
537 * <i>m.</i>{@code end()}. </p>
538 *
539 * @param group
540 * The index of a capturing group in this matcher's pattern
541 *
542 * @return The offset after the last character captured by the group,
543 * or {@code -1} if the match was successful
544 * but the group itself did not match anything
545 *
546 * @throws IllegalStateException
547 * If no match has yet been attempted,
548 * or if the previous match operation failed
549 *
550 * @throws IndexOutOfBoundsException
551 * If there is no capturing group in the pattern
552 * with the given index
553 */
554 public int end(int group) {
555 if (first < 0)
556 throw new IllegalStateException("No match available");
557 if (group < 0 || group > groupCount())
558 throw new IndexOutOfBoundsException("No group " + group);
559 return groups[group * 2 + 1];
560 }
561
562 /**
563 * Returns the offset after the last character of the subsequence
564 * captured by the given <a href="Pattern.html#groupname">named-capturing
565 * group</a> during the previous match operation.
566 *
567 * @param name
568 * The name of a named-capturing group in this matcher's pattern
569 *
570 * @return The offset after the last character captured by the group,
571 * or {@code -1} if the match was successful
572 * but the group itself did not match anything
573 *
574 * @throws IllegalStateException
575 * If no match has yet been attempted,
576 * or if the previous match operation failed
577 *
578 * @throws IllegalArgumentException
579 * If there is no capturing group in the pattern
580 * with the given name
581 * @since 1.8
582 */
583 public int end(String name) {
584 return groups[getMatchedGroupIndex(name) * 2 + 1];
585 }
586
587 /**
588 * Returns the input subsequence matched by the previous match.
589 *
590 * <p> For a matcher <i>m</i> with input sequence <i>s</i>,
591 * the expressions <i>m.</i>{@code group()} and
592 * <i>s.</i>{@code substring(}<i>m.</i>{@code start(),} <i>m.</i>
593 * {@code end())} are equivalent. </p>
594 *
595 * <p> Note that some patterns, for example {@code a*}, match the empty
596 * string. This method will return the empty string when the pattern
597 * successfully matches the empty string in the input. </p>
598 *
599 * @return The (possibly empty) subsequence matched by the previous match,
600 * in string form
601 *
602 * @throws IllegalStateException
603 * If no match has yet been attempted,
604 * or if the previous match operation failed
605 */
606 public String group() {
607 return group(0);
608 }
609
610 /**
611 * Returns the input subsequence captured by the given group during the
612 * previous match operation.
613 *
614 * <p> For a matcher <i>m</i>, input sequence <i>s</i>, and group index
615 * <i>g</i>, the expressions <i>m.</i>{@code group(}<i>g</i>{@code )} and
616 * <i>s.</i>{@code substring(}<i>m.</i>{@code start(}<i>g</i>{@code
617 * ),} <i>m.</i>{@code end(}<i>g</i>{@code ))}
618 * are equivalent. </p>
619 *
620 * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left
621 * to right, starting at one. Group zero denotes the entire pattern, so
622 * the expression {@code m.group(0)} is equivalent to {@code m.group()}.
623 * </p>
624 *
625 * <p> If the match was successful but the group specified failed to match
626 * any part of the input sequence, then {@code null} is returned. Note
627 * that some groups, for example {@code (a*)}, match the empty string.
628 * This method will return the empty string when such a group successfully
629 * matches the empty string in the input. </p>
630 *
631 * @param group
632 * The index of a capturing group in this matcher's pattern
633 *
634 * @return The (possibly empty) subsequence captured by the group
635 * during the previous match, or {@code null} if the group
636 * failed to match part of the input
637 *
638 * @throws IllegalStateException
639 * If no match has yet been attempted,
640 * or if the previous match operation failed
641 *
642 * @throws IndexOutOfBoundsException
643 * If there is no capturing group in the pattern
644 * with the given index
645 */
646 public String group(int group) {
647 if (first < 0)
648 throw new IllegalStateException("No match found");
649 if (group < 0 || group > groupCount())
650 throw new IndexOutOfBoundsException("No group " + group);
651 if ((groups[group*2] == -1) || (groups[group*2+1] == -1))
652 return null;
653 return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString();
654 }
655
656 /**
657 * Returns the input subsequence captured by the given
658 * <a href="Pattern.html#groupname">named-capturing group</a> during the
659 * previous match operation.
660 *
661 * <p> If the match was successful but the group specified failed to match
662 * any part of the input sequence, then {@code null} is returned. Note
663 * that some groups, for example {@code (a*)}, match the empty string.
664 * This method will return the empty string when such a group successfully
665 * matches the empty string in the input. </p>
666 *
667 * @param name
668 * The name of a named-capturing group in this matcher's pattern
669 *
670 * @return The (possibly empty) subsequence captured by the named group
671 * during the previous match, or {@code null} if the group
672 * failed to match part of the input
673 *
674 * @throws IllegalStateException
675 * If no match has yet been attempted,
676 * or if the previous match operation failed
677 *
678 * @throws IllegalArgumentException
679 * If there is no capturing group in the pattern
680 * with the given name
681 * @since 1.7
682 */
683 public String group(String name) {
684 int group = getMatchedGroupIndex(name);
685 if ((groups[group*2] == -1) || (groups[group*2+1] == -1))
686 return null;
687 return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString();
688 }
689
690 /**
691 * Returns the number of capturing groups in this matcher's pattern.
692 *
693 * <p> Group zero denotes the entire pattern by convention. It is not
694 * included in this count.
695 *
696 * <p> Any non-negative integer smaller than or equal to the value
697 * returned by this method is guaranteed to be a valid group index for
698 * this matcher. </p>
699 *
700 * @return The number of capturing groups in this matcher's pattern
701 */
702 public int groupCount() {
703 return parentPattern.capturingGroupCount - 1;
704 }
705
706 /**
707 * Attempts to match the entire region against the pattern.
708 *
709 * <p> If the match succeeds then more information can be obtained via the
710 * {@code start}, {@code end}, and {@code group} methods. </p>
711 *
712 * @return {@code true} if, and only if, the entire region sequence
713 * matches this matcher's pattern
714 */
715 public boolean matches() {
716 return match(from, ENDANCHOR);
717 }
718
719 /**
720 * Attempts to find the next subsequence of the input sequence that matches
721 * the pattern.
722 *
723 * <p> This method starts at the beginning of this matcher's region, or, if
724 * a previous invocation of the method was successful and the matcher has
725 * not since been reset, at the first character not matched by the previous
726 * match.
727 *
728 * <p> If the match succeeds then more information can be obtained via the
729 * {@code start}, {@code end}, and {@code group} methods. </p>
730 *
731 * @return {@code true} if, and only if, a subsequence of the input
732 * sequence matches this matcher's pattern
733 */
734 public boolean find() {
735 int nextSearchIndex = last;
736 if (nextSearchIndex == first)
737 nextSearchIndex++;
738
739 // If next search starts before region, start it at region
740 if (nextSearchIndex < from)
741 nextSearchIndex = from;
742
743 // If next search starts beyond region then it fails
744 if (nextSearchIndex > to) {
745 for (int i = 0; i < groups.length; i++)
746 groups[i] = -1;
747 return false;
748 }
749 return search(nextSearchIndex);
750 }
751
752 /**
753 * Resets this matcher and then attempts to find the next subsequence of
754 * the input sequence that matches the pattern, starting at the specified
755 * index.
756 *
757 * <p> If the match succeeds then more information can be obtained via the
758 * {@code start}, {@code end}, and {@code group} methods, and subsequent
759 * invocations of the {@link #find()} method will start at the first
760 * character not matched by this match. </p>
761 *
762 * @param start the index to start searching for a match
763 * @throws IndexOutOfBoundsException
764 * If start is less than zero or if start is greater than the
765 * length of the input sequence.
766 *
767 * @return {@code true} if, and only if, a subsequence of the input
768 * sequence starting at the given index matches this matcher's
769 * pattern
770 */
771 public boolean find(int start) {
772 int limit = getTextLength();
773 if ((start < 0) || (start > limit))
774 throw new IndexOutOfBoundsException("Illegal start index");
775 reset();
776 return search(start);
777 }
778
779 /**
780 * Attempts to match the input sequence, starting at the beginning of the
781 * region, against the pattern.
782 *
783 * <p> Like the {@link #matches matches} method, this method always starts
784 * at the beginning of the region; unlike that method, it does not
785 * require that the entire region be matched.
786 *
787 * <p> If the match succeeds then more information can be obtained via the
788 * {@code start}, {@code end}, and {@code group} methods. </p>
789 *
790 * @return {@code true} if, and only if, a prefix of the input
791 * sequence matches this matcher's pattern
792 */
793 public boolean lookingAt() {
794 return match(from, NOANCHOR);
795 }
796
797 /**
798 * Returns a literal replacement {@code String} for the specified
799 * {@code String}.
800 *
801 * This method produces a {@code String} that will work
802 * as a literal replacement {@code s} in the
803 * {@code appendReplacement} method of the {@link Matcher} class.
804 * The {@code String} produced will match the sequence of characters
805 * in {@code s} treated as a literal sequence. Slashes ('\') and
806 * dollar signs ('$') will be given no special meaning.
807 *
808 * @param s The string to be literalized
809 * @return A literal string replacement
810 * @since 1.5
811 */
812 public static String quoteReplacement(String s) {
813 if ((s.indexOf('\\') == -1) && (s.indexOf('$') == -1))
814 return s;
815 StringBuilder sb = new StringBuilder();
816 for (int i=0; i<s.length(); i++) {
817 char c = s.charAt(i);
818 if (c == '\\' || c == '$') {
819 sb.append('\\');
820 }
821 sb.append(c);
822 }
823 return sb.toString();
824 }
825
826 /**
827 * Implements a non-terminal append-and-replace step.
828 *
829 * <p> This method performs the following actions: </p>
830 *
831 * <ol>
832 *
833 * <li><p> It reads characters from the input sequence, starting at the
834 * append position, and appends them to the given string buffer. It
835 * stops after reading the last character preceding the previous match,
836 * that is, the character at index {@link
837 * #start()} {@code -} {@code 1}. </p></li>
838 *
839 * <li><p> It appends the given replacement string to the string buffer.
840 * </p></li>
841 *
842 * <li><p> It sets the append position of this matcher to the index of
843 * the last character matched, plus one, that is, to {@link #end()}.
844 * </p></li>
845 *
846 * </ol>
847 *
848 * <p> The replacement string may contain references to subsequences
849 * captured during the previous match: Each occurrence of
850 * <code>${</code><i>name</i><code>}</code> or {@code $}<i>g</i>
851 * will be replaced by the result of evaluating the corresponding
852 * {@link #group(String) group(name)} or {@link #group(int) group(g)}
853 * respectively. For {@code $}<i>g</i>,
854 * the first number after the {@code $} is always treated as part of
855 * the group reference. Subsequent numbers are incorporated into g if
856 * they would form a legal group reference. Only the numerals '0'
857 * through '9' are considered as potential components of the group
858 * reference. If the second group matched the string {@code "foo"}, for
859 * example, then passing the replacement string {@code "$2bar"} would
860 * cause {@code "foobar"} to be appended to the string buffer. A dollar
861 * sign ({@code $}) may be included as a literal in the replacement
862 * string by preceding it with a backslash ({@code \$}).
863 *
864 * <p> Note that backslashes ({@code \}) and dollar signs ({@code $}) in
865 * the replacement string may cause the results to be different than if it
866 * were being treated as a literal replacement string. Dollar signs may be
867 * treated as references to captured subsequences as described above, and
868 * backslashes are used to escape literal characters in the replacement
869 * string.
870 *
871 * <p> This method is intended to be used in a loop together with the
872 * {@link #appendTail(StringBuffer) appendTail} and {@link #find() find}
873 * methods. The following code, for example, writes {@code one dog two dogs
874 * in the yard} to the standard-output stream: </p>
875 *
876 * <blockquote><pre>
877 * Pattern p = Pattern.compile("cat");
878 * Matcher m = p.matcher("one cat two cats in the yard");
879 * StringBuffer sb = new StringBuffer();
880 * while (m.find()) {
881 * m.appendReplacement(sb, "dog");
882 * }
883 * m.appendTail(sb);
884 * System.out.println(sb.toString());</pre></blockquote>
885 *
886 * @param sb
887 * The target string buffer
888 *
889 * @param replacement
890 * The replacement string
891 *
892 * @return This matcher
893 *
894 * @throws IllegalStateException
895 * If no match has yet been attempted,
896 * or if the previous match operation failed
897 *
898 * @throws IllegalArgumentException
899 * If the replacement string refers to a named-capturing
900 * group that does not exist in the pattern
901 *
902 * @throws IndexOutOfBoundsException
903 * If the replacement string refers to a capturing group
904 * that does not exist in the pattern
905 */
906 public Matcher appendReplacement(StringBuffer sb, String replacement) {
907 // If no match, return error
908 if (first < 0)
909 throw new IllegalStateException("No match available");
910 StringBuilder result = new StringBuilder();
911 appendExpandedReplacement(replacement, result);
912 // Append the intervening text
913 sb.append(text, lastAppendPosition, first);
914 // Append the match substitution
915 sb.append(result);
916 lastAppendPosition = last;
917 modCount++;
918 return this;
919 }
920
921 /**
922 * Implements a non-terminal append-and-replace step.
923 *
924 * <p> This method performs the following actions: </p>
925 *
926 * <ol>
927 *
928 * <li><p> It reads characters from the input sequence, starting at the
929 * append position, and appends them to the given string builder. It
930 * stops after reading the last character preceding the previous match,
931 * that is, the character at index {@link
932 * #start()} {@code -} {@code 1}. </p></li>
933 *
934 * <li><p> It appends the given replacement string to the string builder.
935 * </p></li>
936 *
937 * <li><p> It sets the append position of this matcher to the index of
938 * the last character matched, plus one, that is, to {@link #end()}.
939 * </p></li>
940 *
941 * </ol>
942 *
943 * <p> The replacement string may contain references to subsequences
944 * captured during the previous match: Each occurrence of
945 * {@code $}<i>g</i> will be replaced by the result of
946 * evaluating {@link #group(int) group}{@code (}<i>g</i>{@code )}.
947 * The first number after the {@code $} is always treated as part of
948 * the group reference. Subsequent numbers are incorporated into g if
949 * they would form a legal group reference. Only the numerals '0'
950 * through '9' are considered as potential components of the group
951 * reference. If the second group matched the string {@code "foo"}, for
952 * example, then passing the replacement string {@code "$2bar"} would
953 * cause {@code "foobar"} to be appended to the string builder. A dollar
954 * sign ({@code $}) may be included as a literal in the replacement
955 * string by preceding it with a backslash ({@code \$}).
956 *
957 * <p> Note that backslashes ({@code \}) and dollar signs ({@code $}) in
958 * the replacement string may cause the results to be different than if it
959 * were being treated as a literal replacement string. Dollar signs may be
960 * treated as references to captured subsequences as described above, and
961 * backslashes are used to escape literal characters in the replacement
962 * string.
963 *
964 * <p> This method is intended to be used in a loop together with the
965 * {@link #appendTail(StringBuilder) appendTail} and
966 * {@link #find() find} methods. The following code, for example, writes
967 * {@code one dog two dogs in the yard} to the standard-output stream: </p>
968 *
969 * <blockquote><pre>
970 * Pattern p = Pattern.compile("cat");
971 * Matcher m = p.matcher("one cat two cats in the yard");
972 * StringBuilder sb = new StringBuilder();
973 * while (m.find()) {
974 * m.appendReplacement(sb, "dog");
975 * }
976 * m.appendTail(sb);
977 * System.out.println(sb.toString());</pre></blockquote>
978 *
979 * @param sb
980 * The target string builder
981 * @param replacement
982 * The replacement string
983 * @return This matcher
984 *
985 * @throws IllegalStateException
986 * If no match has yet been attempted,
987 * or if the previous match operation failed
988 * @throws IllegalArgumentException
989 * If the replacement string refers to a named-capturing
990 * group that does not exist in the pattern
991 * @throws IndexOutOfBoundsException
992 * If the replacement string refers to a capturing group
993 * that does not exist in the pattern
994 * @since 9
995 */
996 public Matcher appendReplacement(StringBuilder sb, String replacement) {
997 // If no match, return error
998 if (first < 0)
999 throw new IllegalStateException("No match available");
1000 StringBuilder result = new StringBuilder();
1001 appendExpandedReplacement(replacement, result);
1002 // Append the intervening text
1003 sb.append(text, lastAppendPosition, first);
1004 // Append the match substitution
1005 sb.append(result);
1006 lastAppendPosition = last;
1007 modCount++;
1008 return this;
1009 }
1010
1011 /**
1012 * Processes replacement string to replace group references with
1013 * groups.
1014 */
1015 private StringBuilder appendExpandedReplacement(
1016 String replacement, StringBuilder result) {
1017 int cursor = 0;
1018 while (cursor < replacement.length()) {
1019 char nextChar = replacement.charAt(cursor);
1020 if (nextChar == '\\') {
1021 cursor++;
1022 if (cursor == replacement.length())
1023 throw new IllegalArgumentException(
1024 "character to be escaped is missing");
1025 nextChar = replacement.charAt(cursor);
1026 result.append(nextChar);
1027 cursor++;
1028 } else if (nextChar == '$') {
1029 // Skip past $
1030 cursor++;
1031 // Throw IAE if this "$" is the last character in replacement
1032 if (cursor == replacement.length())
1033 throw new IllegalArgumentException(
1034 "Illegal group reference: group index is missing");
1035 nextChar = replacement.charAt(cursor);
1036 int refNum = -1;
1037 if (nextChar == '{') {
1038 cursor++;
1039 StringBuilder gsb = new StringBuilder();
1040 while (cursor < replacement.length()) {
1041 nextChar = replacement.charAt(cursor);
1042 if (ASCII.isLower(nextChar) ||
1043 ASCII.isUpper(nextChar) ||
1044 ASCII.isDigit(nextChar)) {
1045 gsb.append(nextChar);
1046 cursor++;
1047 } else {
1048 break;
1049 }
1050 }
1051 if (gsb.length() == 0)
1052 throw new IllegalArgumentException(
1053 "named capturing group has 0 length name");
1054 if (nextChar != '}')
1055 throw new IllegalArgumentException(
1056 "named capturing group is missing trailing '}'");
1057 String gname = gsb.toString();
1058 if (ASCII.isDigit(gname.charAt(0)))
1059 throw new IllegalArgumentException(
1060 "capturing group name {" + gname +
1061 "} starts with digit character");
1062 if (!parentPattern.namedGroups().containsKey(gname))
1063 throw new IllegalArgumentException(
1064 "No group with name {" + gname + "}");
1065 refNum = parentPattern.namedGroups().get(gname);
1066 cursor++;
1067 } else {
1068 // The first number is always a group
1069 refNum = nextChar - '0';
1070 if ((refNum < 0) || (refNum > 9))
1071 throw new IllegalArgumentException(
1072 "Illegal group reference");
1073 cursor++;
1074 // Capture the largest legal group string
1075 boolean done = false;
1076 while (!done) {
1077 if (cursor >= replacement.length()) {
1078 break;
1079 }
1080 int nextDigit = replacement.charAt(cursor) - '0';
1081 if ((nextDigit < 0) || (nextDigit > 9)) { // not a number
1082 break;
1083 }
1084 int newRefNum = (refNum * 10) + nextDigit;
1085 if (groupCount() < newRefNum) {
1086 done = true;
1087 } else {
1088 refNum = newRefNum;
1089 cursor++;
1090 }
1091 }
1092 }
1093 // Append group
1094 if (start(refNum) != -1 && end(refNum) != -1)
1095 result.append(text, start(refNum), end(refNum));
1096 } else {
1097 result.append(nextChar);
1098 cursor++;
1099 }
1100 }
1101 return result;
1102 }
1103
1104 /**
1105 * Implements a terminal append-and-replace step.
1106 *
1107 * <p> This method reads characters from the input sequence, starting at
1108 * the append position, and appends them to the given string buffer. It is
1109 * intended to be invoked after one or more invocations of the {@link
1110 * #appendReplacement(StringBuffer, String) appendReplacement} method in
1111 * order to copy the remainder of the input sequence. </p>
1112 *
1113 * @param sb
1114 * The target string buffer
1115 *
1116 * @return The target string buffer
1117 */
1118 public StringBuffer appendTail(StringBuffer sb) {
1119 sb.append(text, lastAppendPosition, getTextLength());
1120 return sb;
1121 }
1122
1123 /**
1124 * Implements a terminal append-and-replace step.
1125 *
1126 * <p> This method reads characters from the input sequence, starting at
1127 * the append position, and appends them to the given string builder. It is
1128 * intended to be invoked after one or more invocations of the {@link
1129 * #appendReplacement(StringBuilder, String)
1130 * appendReplacement} method in order to copy the remainder of the input
1131 * sequence. </p>
1132 *
1133 * @param sb
1134 * The target string builder
1135 *
1136 * @return The target string builder
1137 *
1138 * @since 9
1139 */
1140 public StringBuilder appendTail(StringBuilder sb) {
1141 sb.append(text, lastAppendPosition, getTextLength());
1142 return sb;
1143 }
1144
1145 /**
1146 * Replaces every subsequence of the input sequence that matches the
1147 * pattern with the given replacement string.
1148 *
1149 * <p> This method first resets this matcher. It then scans the input
1150 * sequence looking for matches of the pattern. Characters that are not
1151 * part of any match are appended directly to the result string; each match
1152 * is replaced in the result by the replacement string. The replacement
1153 * string may contain references to captured subsequences as in the {@link
1154 * #appendReplacement appendReplacement} method.
1155 *
1156 * <p> Note that backslashes ({@code \}) and dollar signs ({@code $}) in
1157 * the replacement string may cause the results to be different than if it
1158 * were being treated as a literal replacement string. Dollar signs may be
1159 * treated as references to captured subsequences as described above, and
1160 * backslashes are used to escape literal characters in the replacement
1161 * string.
1162 *
1163 * <p> Given the regular expression {@code a*b}, the input
1164 * {@code "aabfooaabfooabfoob"}, and the replacement string
1165 * {@code "-"}, an invocation of this method on a matcher for that
1166 * expression would yield the string {@code "-foo-foo-foo-"}.
1167 *
1168 * <p> Invoking this method changes this matcher's state. If the matcher
1169 * is to be used in further matching operations then it should first be
1170 * reset. </p>
1171 *
1172 * @param replacement
1173 * The replacement string
1174 *
1175 * @return The string constructed by replacing each matching subsequence
1176 * by the replacement string, substituting captured subsequences
1177 * as needed
1178 */
1179 public String replaceAll(String replacement) {
1180 reset();
1181 boolean result = find();
1182 if (result) {
1183 StringBuilder sb = new StringBuilder();
1184 do {
1185 appendReplacement(sb, replacement);
1186 result = find();
1187 } while (result);
1188 appendTail(sb);
1189 return sb.toString();
1190 }
1191 return text.toString();
1192 }
1193
1194 /**
1195 * Replaces every subsequence of the input sequence that matches the
1196 * pattern with the result of applying the given replacer function to the
1197 * match result of this matcher corresponding to that subsequence.
1198 * Exceptions thrown by the function are relayed to the caller.
1199 *
1200 * <p> This method first resets this matcher. It then scans the input
1201 * sequence looking for matches of the pattern. Characters that are not
1202 * part of any match are appended directly to the result string; each match
1203 * is replaced in the result by the applying the replacer function that
1204 * returns a replacement string. Each replacement string may contain
1205 * references to captured subsequences as in the {@link #appendReplacement
1206 * appendReplacement} method.
1207 *
1208 * <p> Note that backslashes ({@code \}) and dollar signs ({@code $}) in
1209 * a replacement string may cause the results to be different than if it
1210 * were being treated as a literal replacement string. Dollar signs may be
1211 * treated as references to captured subsequences as described above, and
1212 * backslashes are used to escape literal characters in the replacement
1213 * string.
1214 *
1215 * <p> Given the regular expression {@code dog}, the input
1216 * {@code "zzzdogzzzdogzzz"}, and the function
1217 * {@code mr -> mr.group().toUpperCase()}, an invocation of this method on
1218 * a matcher for that expression would yield the string
1219 * {@code "zzzDOGzzzDOGzzz"}.
1220 *
1221 * <p> Invoking this method changes this matcher's state. If the matcher
1222 * is to be used in further matching operations then it should first be
1223 * reset. </p>
1224 *
1225 * <p> The replacer function should not modify this matcher's state during
1226 * replacement. This method will, on a best-effort basis, throw a
1227 * {@link java.util.ConcurrentModificationException} if such modification is
1228 * detected.
1229 *
1230 * <p> The state of each match result passed to the replacer function is
1231 * guaranteed to be constant only for the duration of the replacer function
1232 * call and only if the replacer function does not modify this matcher's
1233 * state.
1234 *
1235 * @implNote
1236 * This implementation applies the replacer function to this matcher, which
1237 * is an instance of {@code MatchResult}.
1238 *
1239 * @param replacer
1240 * The function to be applied to the match result of this matcher
1241 * that returns a replacement string.
1242 * @return The string constructed by replacing each matching subsequence
1243 * with the result of applying the replacer function to that
1244 * matched subsequence, substituting captured subsequences as
1245 * needed.
1246 * @throws NullPointerException if the replacer function is null
1247 * @throws ConcurrentModificationException if it is detected, on a
1248 * best-effort basis, that the replacer function modified this
1249 * matcher's state
1250 * @since 9
1251 */
1252 public String replaceAll(Function<MatchResult, String> replacer) {
1253 Objects.requireNonNull(replacer);
1254 reset();
1255 boolean result = find();
1256 if (result) {
1257 StringBuilder sb = new StringBuilder();
1258 do {
1259 int ec = modCount;
1260 String replacement = replacer.apply(this);
1261 if (ec != modCount)
1262 throw new ConcurrentModificationException();
1263 appendReplacement(sb, replacement);
1264 result = find();
1265 } while (result);
1266 appendTail(sb);
1267 return sb.toString();
1268 }
1269 return text.toString();
1270 }
1271
1272 /**
1273 * Returns a stream of match results for each subsequence of the input
1274 * sequence that matches the pattern. The match results occur in the
1275 * same order as the matching subsequences in the input sequence.
1276 *
1277 * <p> Each match result is produced as if by {@link #toMatchResult()}.
1278 *
1279 * <p> This method does not reset this matcher. Matching starts on
1280 * initiation of the terminal stream operation either at the beginning of
1281 * this matcher's region, or, if the matcher has not since been reset, at
1282 * the first character not matched by a previous match.
1283 *
1284 * <p> If the matcher is to be used for further matching operations after
1285 * the terminal stream operation completes then it should be first reset.
1286 *
1287 * <p> This matcher's state should not be modified during execution of the
1288 * returned stream's pipeline. The returned stream's source
1289 * {@code Spliterator} is <em>fail-fast</em> and will, on a best-effort
1290 * basis, throw a {@link java.util.ConcurrentModificationException} if such
1291 * modification is detected.
1292 *
1293 * @return a sequential stream of match results.
1294 * @since 9
1295 */
1296 public Stream<MatchResult> results() {
1297 class MatchResultIterator implements Iterator<MatchResult> {
1298 // -ve for call to find, 0 for not found, 1 for found
1299 int state = -1;
1300 // State for concurrent modification checking
1301 // -1 for uninitialized
1302 int expectedCount = -1;
1303 // The input sequence as a string, set once only after first find
1304 // Avoids repeated conversion from CharSequence for each match
1305 String textAsString;
1306
1307 @Override
1308 public MatchResult next() {
1309 if (expectedCount >= 0 && expectedCount != modCount)
1310 throw new ConcurrentModificationException();
1311
1312 if (!hasNext())
1313 throw new NoSuchElementException();
1314
1315 state = -1;
1316 return toMatchResult(textAsString);
1317 }
1318
1319 @Override
1320 public boolean hasNext() {
1321 if (state >= 0)
1322 return state == 1;
1323
1324 // Defer throwing ConcurrentModificationException to when next
1325 // or forEachRemaining is called. The is consistent with other
1326 // fail-fast implementations.
1327 if (expectedCount >= 0 && expectedCount != modCount)
1328 return true;
1329
1330 boolean found = find();
1331 // Capture the input sequence as a string on first find
1332 if (found && state < 0)
1333 textAsString = text.toString();
1334 state = found ? 1 : 0;
1335 expectedCount = modCount;
1336 return found;
1337 }
1338
1339 @Override
1340 public void forEachRemaining(Consumer<? super MatchResult> action) {
1341 if (expectedCount >= 0 && expectedCount != modCount)
1342 throw new ConcurrentModificationException();
1343
1344 int s = state;
1345 if (s == 0)
1346 return;
1347
1348 // Set state to report no more elements on further operations
1349 state = 0;
1350 expectedCount = -1;
1351
1352 // Perform a first find if required
1353 if (s < 0 && !find())
1354 return;
1355
1356 // Capture the input sequence as a string on first find
1357 textAsString = text.toString();
1358
1359 do {
1360 int ec = modCount;
1361 action.accept(toMatchResult(textAsString));
1362 if (ec != modCount)
1363 throw new ConcurrentModificationException();
1364 } while (find());
1365 }
1366 }
1367 return StreamSupport.stream(Spliterators.spliteratorUnknownSize(
1368 new MatchResultIterator(), Spliterator.ORDERED | Spliterator.NONNULL), false);
1369 }
1370
1371 /**
1372 * Replaces the first subsequence of the input sequence that matches the
1373 * pattern with the given replacement string.
1374 *
1375 * <p> This method first resets this matcher. It then scans the input
1376 * sequence looking for a match of the pattern. Characters that are not
1377 * part of the match are appended directly to the result string; the match
1378 * is replaced in the result by the replacement string. The replacement
1379 * string may contain references to captured subsequences as in the {@link
1380 * #appendReplacement appendReplacement} method.
1381 *
1382 * <p>Note that backslashes ({@code \}) and dollar signs ({@code $}) in
1383 * the replacement string may cause the results to be different than if it
1384 * were being treated as a literal replacement string. Dollar signs may be
1385 * treated as references to captured subsequences as described above, and
1386 * backslashes are used to escape literal characters in the replacement
1387 * string.
1388 *
1389 * <p> Given the regular expression {@code dog}, the input
1390 * {@code "zzzdogzzzdogzzz"}, and the replacement string
1391 * {@code "cat"}, an invocation of this method on a matcher for that
1392 * expression would yield the string {@code "zzzcatzzzdogzzz"}. </p>
1393 *
1394 * <p> Invoking this method changes this matcher's state. If the matcher
1395 * is to be used in further matching operations then it should first be
1396 * reset. </p>
1397 *
1398 * @param replacement
1399 * The replacement string
1400 * @return The string constructed by replacing the first matching
1401 * subsequence by the replacement string, substituting captured
1402 * subsequences as needed
1403 */
1404 public String replaceFirst(String replacement) {
1405 if (replacement == null)
1406 throw new NullPointerException("replacement");
1407 reset();
1408 if (!find())
1409 return text.toString();
1410 StringBuilder sb = new StringBuilder();
1411 appendReplacement(sb, replacement);
1412 appendTail(sb);
1413 return sb.toString();
1414 }
1415
1416 /**
1417 * Replaces the first subsequence of the input sequence that matches the
1418 * pattern with the result of applying the given replacer function to the
1419 * match result of this matcher corresponding to that subsequence.
1420 * Exceptions thrown by the replace function are relayed to the caller.
1421 *
1422 * <p> This method first resets this matcher. It then scans the input
1423 * sequence looking for a match of the pattern. Characters that are not
1424 * part of the match are appended directly to the result string; the match
1425 * is replaced in the result by the applying the replacer function that
1426 * returns a replacement string. The replacement string may contain
1427 * references to captured subsequences as in the {@link #appendReplacement
1428 * appendReplacement} method.
1429 *
1430 * <p>Note that backslashes ({@code \}) and dollar signs ({@code $}) in
1431 * the replacement string may cause the results to be different than if it
1432 * were being treated as a literal replacement string. Dollar signs may be
1433 * treated as references to captured subsequences as described above, and
1434 * backslashes are used to escape literal characters in the replacement
1435 * string.
1436 *
1437 * <p> Given the regular expression {@code dog}, the input
1438 * {@code "zzzdogzzzdogzzz"}, and the function
1439 * {@code mr -> mr.group().toUpperCase()}, an invocation of this method on
1440 * a matcher for that expression would yield the string
1441 * {@code "zzzDOGzzzdogzzz"}.
1442 *
1443 * <p> Invoking this method changes this matcher's state. If the matcher
1444 * is to be used in further matching operations then it should first be
1445 * reset.
1446 *
1447 * <p> The replacer function should not modify this matcher's state during
1448 * replacement. This method will, on a best-effort basis, throw a
1449 * {@link java.util.ConcurrentModificationException} if such modification is
1450 * detected.
1451 *
1452 * <p> The state of the match result passed to the replacer function is
1453 * guaranteed to be constant only for the duration of the replacer function
1454 * call and only if the replacer function does not modify this matcher's
1455 * state.
1456 *
1457 * @implNote
1458 * This implementation applies the replacer function to this matcher, which
1459 * is an instance of {@code MatchResult}.
1460 *
1461 * @param replacer
1462 * The function to be applied to the match result of this matcher
1463 * that returns a replacement string.
1464 * @return The string constructed by replacing the first matching
1465 * subsequence with the result of applying the replacer function to
1466 * the matched subsequence, substituting captured subsequences as
1467 * needed.
1468 * @throws NullPointerException if the replacer function is null
1469 * @throws ConcurrentModificationException if it is detected, on a
1470 * best-effort basis, that the replacer function modified this
1471 * matcher's state
1472 * @since 9
1473 */
1474 public String replaceFirst(Function<MatchResult, String> replacer) {
1475 Objects.requireNonNull(replacer);
1476 reset();
1477 if (!find())
1478 return text.toString();
1479 StringBuilder sb = new StringBuilder();
1480 int ec = modCount;
1481 String replacement = replacer.apply(this);
1482 if (ec != modCount)
1483 throw new ConcurrentModificationException();
1484 appendReplacement(sb, replacement);
1485 appendTail(sb);
1486 return sb.toString();
1487 }
1488
1489 /**
1490 * Sets the limits of this matcher's region. The region is the part of the
1491 * input sequence that will be searched to find a match. Invoking this
1492 * method resets the matcher, and then sets the region to start at the
1493 * index specified by the {@code start} parameter and end at the
1494 * index specified by the {@code end} parameter.
1495 *
1496 * <p>Depending on the transparency and anchoring being used (see
1497 * {@link #useTransparentBounds(boolean) useTransparentBounds} and
1498 * {@link #useAnchoringBounds(boolean) useAnchoringBounds}), certain
1499 * constructs such as anchors may behave differently at or around the
1500 * boundaries of the region.
1501 *
1502 * @param start
1503 * The index to start searching at (inclusive)
1504 * @param end
1505 * The index to end searching at (exclusive)
1506 * @throws IndexOutOfBoundsException
1507 * If start or end is less than zero, if
1508 * start is greater than the length of the input sequence, if
1509 * end is greater than the length of the input sequence, or if
1510 * start is greater than end.
1511 * @return this matcher
1512 * @since 1.5
1513 */
1514 public Matcher region(int start, int end) {
1515 if ((start < 0) || (start > getTextLength()))
1516 throw new IndexOutOfBoundsException("start");
1517 if ((end < 0) || (end > getTextLength()))
1518 throw new IndexOutOfBoundsException("end");
1519 if (start > end)
1520 throw new IndexOutOfBoundsException("start > end");
1521 reset();
1522 from = start;
1523 to = end;
1524 return this;
1525 }
1526
1527 /**
1528 * Reports the start index of this matcher's region. The
1529 * searches this matcher conducts are limited to finding matches
1530 * within {@link #regionStart() regionStart} (inclusive) and
1531 * {@link #regionEnd() regionEnd} (exclusive).
1532 *
1533 * @return The starting point of this matcher's region
1534 * @since 1.5
1535 */
1536 public int regionStart() {
1537 return from;
1538 }
1539
1540 /**
1541 * Reports the end index (exclusive) of this matcher's region.
1542 * The searches this matcher conducts are limited to finding matches
1543 * within {@link #regionStart() regionStart} (inclusive) and
1544 * {@link #regionEnd() regionEnd} (exclusive).
1545 *
1546 * @return the ending point of this matcher's region
1547 * @since 1.5
1548 */
1549 public int regionEnd() {
1550 return to;
1551 }
1552
1553 /**
1554 * Queries the transparency of region bounds for this matcher.
1555 *
1556 * <p> This method returns {@code true} if this matcher uses
1557 * <i>transparent</i> bounds, {@code false} if it uses <i>opaque</i>
1558 * bounds.
1559 *
1560 * <p> See {@link #useTransparentBounds(boolean) useTransparentBounds} for a
1561 * description of transparent and opaque bounds.
1562 *
1563 * <p> By default, a matcher uses opaque region boundaries.
1564 *
1565 * @return {@code true} iff this matcher is using transparent bounds,
1566 * {@code false} otherwise.
1567 * @see java.util.regex.Matcher#useTransparentBounds(boolean)
1568 * @since 1.5
1569 */
1570 public boolean hasTransparentBounds() {
1571 return transparentBounds;
1572 }
1573
1574 /**
1575 * Sets the transparency of region bounds for this matcher.
1576 *
1577 * <p> Invoking this method with an argument of {@code true} will set this
1578 * matcher to use <i>transparent</i> bounds. If the boolean
1579 * argument is {@code false}, then <i>opaque</i> bounds will be used.
1580 *
1581 * <p> Using transparent bounds, the boundaries of this
1582 * matcher's region are transparent to lookahead, lookbehind,
1583 * and boundary matching constructs. Those constructs can see beyond the
1584 * boundaries of the region to see if a match is appropriate.
1585 *
1586 * <p> Using opaque bounds, the boundaries of this matcher's
1587 * region are opaque to lookahead, lookbehind, and boundary matching
1588 * constructs that may try to see beyond them. Those constructs cannot
1589 * look past the boundaries so they will fail to match anything outside
1590 * of the region.
1591 *
1592 * <p> By default, a matcher uses opaque bounds.
1593 *
1594 * @param b a boolean indicating whether to use opaque or transparent
1595 * regions
1596 * @return this matcher
1597 * @see java.util.regex.Matcher#hasTransparentBounds
1598 * @since 1.5
1599 */
1600 public Matcher useTransparentBounds(boolean b) {
1601 transparentBounds = b;
1602 return this;
1603 }
1604
1605 /**
1606 * Queries the anchoring of region bounds for this matcher.
1607 *
1608 * <p> This method returns {@code true} if this matcher uses
1609 * <i>anchoring</i> bounds, {@code false} otherwise.
1610 *
1611 * <p> See {@link #useAnchoringBounds(boolean) useAnchoringBounds} for a
1612 * description of anchoring bounds.
1613 *
1614 * <p> By default, a matcher uses anchoring region boundaries.
1615 *
1616 * @return {@code true} iff this matcher is using anchoring bounds,
1617 * {@code false} otherwise.
1618 * @see java.util.regex.Matcher#useAnchoringBounds(boolean)
1619 * @since 1.5
1620 */
1621 public boolean hasAnchoringBounds() {
1622 return anchoringBounds;
1623 }
1624
1625 /**
1626 * Sets the anchoring of region bounds for this matcher.
1627 *
1628 * <p> Invoking this method with an argument of {@code true} will set this
1629 * matcher to use <i>anchoring</i> bounds. If the boolean
1630 * argument is {@code false}, then <i>non-anchoring</i> bounds will be
1631 * used.
1632 *
1633 * <p> Using anchoring bounds, the boundaries of this
1634 * matcher's region match anchors such as ^ and $.
1635 *
1636 * <p> Without anchoring bounds, the boundaries of this
1637 * matcher's region will not match anchors such as ^ and $.
1638 *
1639 * <p> By default, a matcher uses anchoring region boundaries.
1640 *
1641 * @param b a boolean indicating whether or not to use anchoring bounds.
1642 * @return this matcher
1643 * @see java.util.regex.Matcher#hasAnchoringBounds
1644 * @since 1.5
1645 */
1646 public Matcher useAnchoringBounds(boolean b) {
1647 anchoringBounds = b;
1648 return this;
1649 }
1650
1651 /**
1652 * <p>Returns the string representation of this matcher. The
1653 * string representation of a {@code Matcher} contains information
1654 * that may be useful for debugging. The exact format is unspecified.
1655 *
1656 * @return The string representation of this matcher
1657 * @since 1.5
1658 */
1659 public String toString() {
1660 StringBuilder sb = new StringBuilder();
1661 sb.append("java.util.regex.Matcher")
1662 .append("[pattern=").append(pattern())
1663 .append(" region=")
1664 .append(regionStart()).append(',').append(regionEnd())
1665 .append(" lastmatch=");
1666 if ((first >= 0) && (group() != null)) {
1667 sb.append(group());
1668 }
1669 sb.append(']');
1670 return sb.toString();
1671 }
1672
1673 /**
1674 * <p>Returns true if the end of input was hit by the search engine in
1675 * the last match operation performed by this matcher.
1676 *
1677 * <p>When this method returns true, then it is possible that more input
1678 * would have changed the result of the last search.
1679 *
1680 * @return true iff the end of input was hit in the last match; false
1681 * otherwise
1682 * @since 1.5
1683 */
1684 public boolean hitEnd() {
1685 return hitEnd;
1686 }
1687
1688 /**
1689 * <p>Returns true if more input could change a positive match into a
1690 * negative one.
1691 *
1692 * <p>If this method returns true, and a match was found, then more
1693 * input could cause the match to be lost. If this method returns false
1694 * and a match was found, then more input might change the match but the
1695 * match won't be lost. If a match was not found, then requireEnd has no
1696 * meaning.
1697 *
1698 * @return true iff more input could change a positive match into a
1699 * negative one.
1700 * @since 1.5
1701 */
1702 public boolean requireEnd() {
1703 return requireEnd;
1704 }
1705
1706 /**
1707 * Initiates a search to find a Pattern within the given bounds.
1708 * The groups are filled with default values and the match of the root
1709 * of the state machine is called. The state machine will hold the state
1710 * of the match as it proceeds in this matcher.
1711 *
1712 * Matcher.from is not set here, because it is the "hard" boundary
1713 * of the start of the search which anchors will set to. The from param
1714 * is the "soft" boundary of the start of the search, meaning that the
1715 * regex tries to match at that index but ^ won't match there. Subsequent
1716 * calls to the search methods start at a new "soft" boundary which is
1717 * the end of the previous match.
1718 */
1719 boolean search(int from) {
1720 this.hitEnd = false;
1721 this.requireEnd = false;
1722 from = from < 0 ? 0 : from;
1723 this.first = from;
1724 this.oldLast = oldLast < 0 ? from : oldLast;
1725 for (int i = 0; i < groups.length; i++)
1726 groups[i] = -1;
1727 for (int i = 0; i < localsPos.length; i++) {
1728 if (localsPos[i] != null)
1729 localsPos[i].clear();
1730 }
1731 acceptMode = NOANCHOR;
1732 boolean result = parentPattern.root.match(this, from, text);
1733 if (!result)
1734 this.first = -1;
1735 this.oldLast = this.last;
1736 this.modCount++;
1737 return result;
1738 }
1739
1740 /**
1741 * Initiates a search for an anchored match to a Pattern within the given
1742 * bounds. The groups are filled with default values and the match of the
1743 * root of the state machine is called. The state machine will hold the
1744 * state of the match as it proceeds in this matcher.
1745 */
1746 boolean match(int from, int anchor) {
1747 this.hitEnd = false;
1748 this.requireEnd = false;
1749 from = from < 0 ? 0 : from;
1750 this.first = from;
1751 this.oldLast = oldLast < 0 ? from : oldLast;
1752 for (int i = 0; i < groups.length; i++)
1753 groups[i] = -1;
1754 for (int i = 0; i < localsPos.length; i++) {
1755 if (localsPos[i] != null)
1756 localsPos[i].clear();
1757 }
1758 acceptMode = anchor;
1759 boolean result = parentPattern.matchRoot.match(this, from, text);
1760 if (!result)
1761 this.first = -1;
1762 this.oldLast = this.last;
1763 this.modCount++;
1764 return result;
1765 }
1766
1767 /**
1768 * Returns the end index of the text.
1769 *
1770 * @return the index after the last character in the text
1771 */
1772 int getTextLength() {
1773 return text.length();
1774 }
1775
1776 /**
1777 * Generates a String from this matcher's input in the specified range.
1778 *
1779 * @param beginIndex the beginning index, inclusive
1780 * @param endIndex the ending index, exclusive
1781 * @return A String generated from this matcher's input
1782 */
1783 CharSequence getSubSequence(int beginIndex, int endIndex) {
1784 return text.subSequence(beginIndex, endIndex);
1785 }
1786
1787 /**
1788 * Returns this matcher's input character at index i.
1789 *
1790 * @return A char from the specified index
1791 */
1792 char charAt(int i) {
1793 return text.charAt(i);
1794 }
1795
1796 /**
1797 * Returns the group index of the matched capturing group.
1798 *
1799 * @return the index of the named-capturing group
1800 */
1801 int getMatchedGroupIndex(String name) {
1802 Objects.requireNonNull(name, "Group name");
1803 if (first < 0)
1804 throw new IllegalStateException("No match found");
1805 if (!parentPattern.namedGroups().containsKey(name))
1806 throw new IllegalArgumentException("No group with name <" + name + ">");
1807 return parentPattern.namedGroups().get(name);
1808 }
1809 }