001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.lang3;
018
019/**
020 * <p>Operations on {@link CharSequence} that are
021 * {@code null} safe.</p>
022 *
023 * @see CharSequence
024 * @since 3.0
025 */
026public class CharSequenceUtils {
027
028    private static final int NOT_FOUND = -1;
029
030    /**
031     * <p>{@code CharSequenceUtils} instances should NOT be constructed in
032     * standard programming. </p>
033     *
034     * <p>This constructor is public to permit tools that require a JavaBean
035     * instance to operate.</p>
036     */
037    public CharSequenceUtils() {
038    }
039
040    //-----------------------------------------------------------------------
041    /**
042     * <p>Returns a new {@code CharSequence} that is a subsequence of this
043     * sequence starting with the {@code char} value at the specified index.</p>
044     *
045     * <p>This provides the {@code CharSequence} equivalent to {@link String#substring(int)}.
046     * The length (in {@code char}) of the returned sequence is {@code length() - start},
047     * so if {@code start == end} then an empty sequence is returned.</p>
048     *
049     * @param cs  the specified subsequence, null returns null
050     * @param start  the start index, inclusive, valid
051     * @return a new subsequence, may be null
052     * @throws IndexOutOfBoundsException if {@code start} is negative or if
053     *  {@code start} is greater than {@code length()}
054     */
055    public static CharSequence subSequence(final CharSequence cs, final int start) {
056        return cs == null ? null : cs.subSequence(start, cs.length());
057    }
058
059    //-----------------------------------------------------------------------
060    /**
061     * Returns the index within {@code cs} of the first occurrence of the
062     * specified character, starting the search at the specified index.
063     * <p>
064     * If a character with value {@code searchChar} occurs in the
065     * character sequence represented by the {@code cs}
066     * object at an index no smaller than {@code start}, then
067     * the index of the first such occurrence is returned. For values
068     * of {@code searchChar} in the range from 0 to 0xFFFF (inclusive),
069     * this is the smallest value <i>k</i> such that:
070     * <blockquote><pre>
071     * (this.charAt(<i>k</i>) == searchChar) &amp;&amp; (<i>k</i> &gt;= start)
072     * </pre></blockquote>
073     * is true. For other values of {@code searchChar}, it is the
074     * smallest value <i>k</i> such that:
075     * <blockquote><pre>
076     * (this.codePointAt(<i>k</i>) == searchChar) &amp;&amp; (<i>k</i> &gt;= start)
077     * </pre></blockquote>
078     * is true. In either case, if no such character occurs inm {@code cs}
079     * at or after position {@code start}, then
080     * {@code -1} is returned.
081     *
082     * <p>
083     * There is no restriction on the value of {@code start}. If it
084     * is negative, it has the same effect as if it were zero: the entire
085     * {@code CharSequence} may be searched. If it is greater than
086     * the length of {@code cs}, it has the same effect as if it were
087     * equal to the length of {@code cs}: {@code -1} is returned.
088     *
089     * <p>All indices are specified in {@code char} values
090     * (Unicode code units).
091     *
092     * @param cs  the {@code CharSequence} to be processed, not null
093     * @param searchChar  the char to be searched for
094     * @param start  the start index, negative starts at the string start
095     * @return the index where the search char was found, -1 if not found
096     * @since 3.6 updated to behave more like {@code String}
097     */
098    static int indexOf(final CharSequence cs, final int searchChar, int start) {
099        if (cs instanceof String) {
100            return ((String) cs).indexOf(searchChar, start);
101        }
102        final int sz = cs.length();
103        if (start < 0) {
104            start = 0;
105        }
106        if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
107            for (int i = start; i < sz; i++) {
108                if (cs.charAt(i) == searchChar) {
109                    return i;
110                }
111            }
112            return NOT_FOUND;
113        }
114        //supplementary characters (LANG1300)
115        if (searchChar <= Character.MAX_CODE_POINT) {
116            final char[] chars = Character.toChars(searchChar);
117            for (int i = start; i < sz - 1; i++) {
118                final char high = cs.charAt(i);
119                final char low = cs.charAt(i + 1);
120                if (high == chars[0] && low == chars[1]) {
121                    return i;
122                }
123            }
124        }
125        return NOT_FOUND;
126    }
127
128    /**
129     * Used by the indexOf(CharSequence methods) as a green implementation of indexOf.
130     *
131     * @param cs the {@code CharSequence} to be processed
132     * @param searchChar the {@code CharSequence} to be searched for
133     * @param start the start index
134     * @return the index where the search sequence was found
135     */
136    static int indexOf(final CharSequence cs, final CharSequence searchChar, final int start) {
137        if (cs instanceof String) {
138            return ((String) cs).indexOf(searchChar.toString(), start);
139        } else if (cs instanceof StringBuilder) {
140            return ((StringBuilder) cs).indexOf(searchChar.toString(), start);
141        } else if (cs instanceof StringBuffer) {
142            return ((StringBuffer) cs).indexOf(searchChar.toString(), start);
143        }
144        return cs.toString().indexOf(searchChar.toString(), start);
145//        if (cs instanceof String && searchChar instanceof String) {
146//            // TODO: Do we assume searchChar is usually relatively small;
147//            //       If so then calling toString() on it is better than reverting to
148//            //       the green implementation in the else block
149//            return ((String) cs).indexOf((String) searchChar, start);
150//        } else {
151//            // TODO: Implement rather than convert to String
152//            return cs.toString().indexOf(searchChar.toString(), start);
153//        }
154    }
155
156    /**
157     * Returns the index within {@code cs} of the last occurrence of
158     * the specified character, searching backward starting at the
159     * specified index. For values of {@code searchChar} in the range
160     * from 0 to 0xFFFF (inclusive), the index returned is the largest
161     * value <i>k</i> such that:
162     * <blockquote><pre>
163     * (this.charAt(<i>k</i>) == searchChar) &amp;&amp; (<i>k</i> &lt;= start)
164     * </pre></blockquote>
165     * is true. For other values of {@code searchChar}, it is the
166     * largest value <i>k</i> such that:
167     * <blockquote><pre>
168     * (this.codePointAt(<i>k</i>) == searchChar) &amp;&amp; (<i>k</i> &lt;= start)
169     * </pre></blockquote>
170     * is true. In either case, if no such character occurs in {@code cs}
171     * at or before position {@code start}, then {@code -1} is returned.
172     *
173     * <p>All indices are specified in {@code char} values
174     * (Unicode code units).
175     *
176     * @param cs  the {@code CharSequence} to be processed
177     * @param searchChar  the char to be searched for
178     * @param start  the start index, negative returns -1, beyond length starts at end
179     * @return the index where the search char was found, -1 if not found
180     * @since 3.6 updated to behave more like {@code String}
181     */
182    static int lastIndexOf(final CharSequence cs, final int searchChar, int start) {
183        if (cs instanceof String) {
184            return ((String) cs).lastIndexOf(searchChar, start);
185        }
186        final int sz = cs.length();
187        if (start < 0) {
188            return NOT_FOUND;
189        }
190        if (start >= sz) {
191            start = sz - 1;
192        }
193        if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
194            for (int i = start; i >= 0; --i) {
195                if (cs.charAt(i) == searchChar) {
196                    return i;
197                }
198            }
199            return NOT_FOUND;
200        }
201        //supplementary characters (LANG1300)
202        //NOTE - we must do a forward traversal for this to avoid duplicating code points
203        if (searchChar <= Character.MAX_CODE_POINT) {
204            final char[] chars = Character.toChars(searchChar);
205            //make sure it's not the last index
206            if (start == sz - 1) {
207                return NOT_FOUND;
208            }
209            for (int i = start; i >= 0; i--) {
210                final char high = cs.charAt(i);
211                final char low = cs.charAt(i + 1);
212                if (chars[0] == high && chars[1] == low) {
213                    return i;
214                }
215            }
216        }
217        return NOT_FOUND;
218    }
219
220    static final int TO_STRING_LIMIT = 16;
221
222    /**
223     * Used by the lastIndexOf(CharSequence methods) as a green implementation of lastIndexOf
224     *
225     * @param cs the {@code CharSequence} to be processed
226     * @param searchChar the {@code CharSequence} to find
227     * @param start the start index
228     * @return the index where the search sequence was found
229     */
230    static int lastIndexOf(final CharSequence cs, final CharSequence searchChar, int start) {
231        if (searchChar == null || cs == null) {
232            return NOT_FOUND;
233        }
234        if (searchChar instanceof String) {
235            if (cs instanceof String) {
236                return ((String) cs).lastIndexOf((String) searchChar, start);
237            } else if (cs instanceof StringBuilder) {
238                return ((StringBuilder) cs).lastIndexOf((String) searchChar, start);
239            } else if (cs instanceof StringBuffer) {
240                return ((StringBuffer) cs).lastIndexOf((String) searchChar, start);
241            }
242        }
243
244        final int len1 = cs.length();
245        final int len2 = searchChar.length();
246
247        if (start > len1) {
248            start = len1;
249        }
250
251        if (start < 0 || len2 < 0 || len2 > len1) {
252            return NOT_FOUND;
253        }
254
255        if (len2 == 0) {
256            return start;
257        }
258
259        if (len2 <= TO_STRING_LIMIT) {
260            if (cs instanceof String) {
261                return ((String) cs).lastIndexOf(searchChar.toString(), start);
262            } else if (cs instanceof StringBuilder) {
263                return ((StringBuilder) cs).lastIndexOf(searchChar.toString(), start);
264            } else if (cs instanceof StringBuffer) {
265                return ((StringBuffer) cs).lastIndexOf(searchChar.toString(), start);
266            }
267        }
268
269        if (start + len2 > len1) {
270            start = len1 - len2;
271        }
272
273        final char char0 = searchChar.charAt(0);
274
275        int i = start;
276        while (true) {
277            while (cs.charAt(i) != char0) {
278                i--;
279                if (i < 0) {
280                    return NOT_FOUND;
281                }
282            }
283            if (checkLaterThan1(cs, searchChar, len2, i)) {
284                return i;
285            }
286            i--;
287            if (i < 0) {
288                return NOT_FOUND;
289            }
290        }
291    }
292
293    private static boolean checkLaterThan1(final CharSequence cs, final CharSequence searchChar, final int len2, final int start1) {
294        for (int i = 1, j = len2 - 1; i <= j; i++, j--) {
295            if (cs.charAt(start1 + i) != searchChar.charAt(i)
296                    ||
297                    cs.charAt(start1 + j) != searchChar.charAt(j)
298            ) {
299                return false;
300            }
301        }
302        return true;
303    }
304
305    /**
306     * Converts the given CharSequence to a char[].
307     *
308     * @param source the {@code CharSequence} to be processed.
309     * @return the resulting char array, never null.
310     * @since 3.11
311     */
312    public static char[] toCharArray(final CharSequence source) {
313        final int len = StringUtils.length(source);
314        if (len == 0) {
315            return ArrayUtils.EMPTY_CHAR_ARRAY;
316        }
317        if (source instanceof String) {
318            return ((String) source).toCharArray();
319        }
320        final char[] array = new char[len];
321        for (int i = 0; i < len; i++) {
322            array[i] = source.charAt(i);
323        }
324        return array;
325    }
326
327    /**
328     * Green implementation of regionMatches.
329     *
330     * @param cs the {@code CharSequence} to be processed
331     * @param ignoreCase whether or not to be case insensitive
332     * @param thisStart the index to start on the {@code cs} CharSequence
333     * @param substring the {@code CharSequence} to be looked for
334     * @param start the index to start on the {@code substring} CharSequence
335     * @param length character length of the region
336     * @return whether the region matched
337     */
338    static boolean regionMatches(final CharSequence cs, final boolean ignoreCase, final int thisStart,
339            final CharSequence substring, final int start, final int length)    {
340        if (cs instanceof String && substring instanceof String) {
341            return ((String) cs).regionMatches(ignoreCase, thisStart, (String) substring, start, length);
342        }
343        int index1 = thisStart;
344        int index2 = start;
345        int tmpLen = length;
346
347        // Extract these first so we detect NPEs the same as the java.lang.String version
348        final int srcLen = cs.length() - thisStart;
349        final int otherLen = substring.length() - start;
350
351        // Check for invalid parameters
352        if (thisStart < 0 || start < 0 || length < 0) {
353            return false;
354        }
355
356        // Check that the regions are long enough
357        if (srcLen < length || otherLen < length) {
358            return false;
359        }
360
361        while (tmpLen-- > 0) {
362            final char c1 = cs.charAt(index1++);
363            final char c2 = substring.charAt(index2++);
364
365            if (c1 == c2) {
366                continue;
367            }
368
369            if (!ignoreCase) {
370                return false;
371            }
372
373            // The real same check as in String.regionMatches():
374            final char u1 = Character.toUpperCase(c1);
375            final char u2 = Character.toUpperCase(c2);
376            if (u1 != u2 && Character.toLowerCase(u1) != Character.toLowerCase(u2)) {
377                return false;
378            }
379        }
380
381        return true;
382    }
383}