From ee7cdcc0c8efda0da206adad654817a55b5a1149 Mon Sep 17 00:00:00 2001 From: Josef Haider Date: Wed, 5 Nov 2025 17:04:44 +0100 Subject: [PATCH] TruffleString and JSON optimizations --- .../oracle/graal/python/pegparser/Parser.java | 2 +- .../builtins/modules/ArrayModuleBuiltins.java | 4 +- .../modules/BinasciiModuleBuiltins.java | 4 +- .../builtins/modules/BuiltinFunctions.java | 7 +- .../builtins/modules/SysModuleBuiltins.java | 4 +- .../modules/cext/PythonCextBuiltins.java | 2 +- .../cext/PythonCextUnicodeBuiltins.java | 7 +- .../builtins/modules/codecs/CharmapNodes.java | 37 +- .../modules/codecs/ErrorHandlers.java | 30 +- .../modules/csv/CSVDialectBuiltins.java | 6 +- .../builtins/modules/csv/CSVReader.java | 8 +- .../modules/csv/CSVReaderBuiltins.java | 2 +- .../modules/csv/CSVWriterBuiltins.java | 13 +- .../modules/functools/PartialBuiltins.java | 10 +- .../io/IncrementalNewlineDecoderBuiltins.java | 19 +- .../python/builtins/modules/io/PStringIO.java | 17 +- .../builtins/modules/io/StringIOBuiltins.java | 15 +- .../modules/io/TextIOWrapperBuiltins.java | 9 +- .../modules/io/TextIOWrapperNodes.java | 21 +- .../modules/json/JSONEncoderBuiltins.java | 599 ++++++----- .../modules/json/JSONModuleBuiltins.java | 79 +- .../modules/json/JSONScannerBuiltins.java | 931 +++++++++++++----- .../builtins/modules/json/JSONUtils.java | 212 ++-- .../builtins/modules/json/PJSONEncoder.java | 10 +- .../builtins/modules/pickle/PPickler.java | 2 +- .../builtins/modules/pickle/PickleUtils.java | 4 +- .../builtins/modules/pickle/PicklerNodes.java | 10 +- .../builtins/modules/re/PatternBuiltins.java | 21 +- .../builtins/objects/array/ArrayBuiltins.java | 7 +- .../objects/bytes/ByteArrayBuiltins.java | 6 +- .../objects/bytes/BytesCommonBuiltins.java | 4 +- .../builtins/objects/bytes/BytesNodes.java | 3 +- .../builtins/objects/bytes/BytesUtils.java | 9 +- .../builtins/objects/cext/capi/CExtNodes.java | 4 +- .../objects/common/BufferStorageNodes.java | 4 +- .../builtins/objects/deque/DequeBuiltins.java | 4 +- .../objects/dict/DictReprBuiltin.java | 16 +- .../exception/BaseExceptionBuiltins.java | 4 +- .../exception/BaseExceptionGroupBuiltins.java | 4 +- .../objects/exception/ExceptionNodes.java | 3 +- .../exception/UnicodeEncodeErrorBuiltins.java | 4 +- .../UnicodeTranslateErrorBuiltins.java | 4 +- .../builtins/objects/floats/FloatUtils.java | 3 +- .../function/AbstractFunctionBuiltins.java | 3 +- .../builtins/objects/list/ListBuiltins.java | 5 +- .../memoryview/MemoryViewBuiltins.java | 6 +- .../objects/method/ClassmethodBuiltins.java | 3 +- .../objects/method/StaticmethodBuiltins.java | 3 +- .../namespace/SimpleNamespaceBuiltins.java | 6 +- .../builtins/objects/object/ObjectNodes.java | 3 +- .../ordereddict/OrderedDictBuiltins.java | 6 +- .../builtins/objects/set/BaseSetBuiltins.java | 8 +- .../python/builtins/objects/str/PString.java | 4 +- .../builtins/objects/str/StringBuiltins.java | 81 +- .../builtins/objects/str/StringNodes.java | 27 +- .../builtins/objects/str/StringUtils.java | 33 +- .../objects/tuple/StructSequenceBuiltins.java | 4 +- .../builtins/objects/tuple/TupleBuiltins.java | 4 +- .../objects/type/PythonManagedClass.java | 6 +- .../python/builtins/objects/type/TpSlots.java | 6 +- .../objects/types/GenericAliasBuiltins.java | 7 +- .../objects/types/GenericTypeNodes.java | 4 +- .../objects/types/UnionTypeBuiltins.java | 5 +- .../graal/python/compiler/Unparser.java | 11 +- .../python/lib/PyMemoryViewFromObject.java | 2 +- .../graal/python/lib/PyObjectFunctionStr.java | 5 +- .../graal/python/lib/PyObjectGetAttr.java | 2 +- .../graal/python/lib/PyObjectGetAttrO.java | 2 +- .../graal/python/lib/PyObjectLookupAttr.java | 10 +- .../graal/python/lib/PyObjectLookupAttrO.java | 2 +- .../python/lib/PyUnicodeReadCharNode.java | 4 +- .../nodes/argument/CreateArgumentsNode.java | 3 +- .../python/nodes/bytecode/ImportStarNode.java | 8 +- .../clinic/CodePointConversionNode.java | 4 +- .../nodes/statement/AbstractImportNode.java | 3 +- .../graal/python/runtime/object/PFactory.java | 2 +- .../sequence/storage/BoolSequenceStorage.java | 4 +- .../sequence/storage/ByteSequenceStorage.java | 13 +- .../storage/DoubleSequenceStorage.java | 4 +- .../sequence/storage/IntSequenceStorage.java | 4 +- .../sequence/storage/LongSequenceStorage.java | 4 +- .../storage/ObjectSequenceStorage.java | 10 +- .../graal/python/util/ArrayBuilder.java | 18 +- .../graal/python/util/BufferFormat.java | 12 +- .../graal/python/util/CharsetMapping.java | 3 +- graalpython/lib-python/3/json/decoder.py | 21 +- graalpython/lib-python/3/json/scanner.py | 19 +- 87 files changed, 1598 insertions(+), 960 deletions(-) diff --git a/graalpython/com.oracle.graal.python.pegparser/src/com/oracle/graal/python/pegparser/Parser.java b/graalpython/com.oracle.graal.python.pegparser/src/com/oracle/graal/python/pegparser/Parser.java index 54341b395d..d7c0846fd0 100644 --- a/graalpython/com.oracle.graal.python.pegparser/src/com/oracle/graal/python/pegparser/Parser.java +++ b/graalpython/com.oracle.graal.python.pegparser/src/com/oracle/graal/python/pegparser/Parser.java @@ -23967,7 +23967,7 @@ private boolean genLookahead__tmp_328_rule(boolean match) { return (result != null) == match; } - + @Override protected SSTNode runParser(InputType inputType) { SSTNode result = null; diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/ArrayModuleBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/ArrayModuleBuiltins.java index 43086824f9..91673d035d 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/ArrayModuleBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/ArrayModuleBuiltins.java @@ -102,7 +102,7 @@ static Object reconstructCached(VirtualFrame frame, Object arrayType, TruffleStr @Exclusive @Cached IsSubtypeNode isSubtypeNode, @Exclusive @Cached ArrayBuiltins.ByteSwapNode byteSwapNode, @Exclusive @Cached TruffleString.CodePointLengthNode lengthNode, - @Exclusive @Cached TruffleString.CodePointAtIndexNode atIndexNode, + @Exclusive @Cached TruffleString.CodePointAtIndexUTF32Node atIndexNode, @Exclusive @Cached TypeNodes.GetInstanceShape getInstanceShape, @Exclusive @Cached PRaiseNode raiseNode) { BufferFormat format = BufferFormat.forArray(typeCode, lengthNode, atIndexNode); @@ -124,7 +124,7 @@ static Object reconstruct(VirtualFrame frame, Object arrayType, TruffleString ty @Exclusive @Cached IsSubtypeNode isSubtypeNode, @Exclusive @Cached ArrayBuiltins.ByteSwapNode byteSwapNode, @Exclusive @Cached TruffleString.CodePointLengthNode lengthNode, - @Exclusive @Cached TruffleString.CodePointAtIndexNode atIndexNode, + @Exclusive @Cached TruffleString.CodePointAtIndexUTF32Node atIndexNode, @Exclusive @Cached TypeNodes.GetInstanceShape getInstanceShape, @Exclusive @Cached PRaiseNode raiseNode) { BufferFormat format = BufferFormat.forArray(typeCode, lengthNode, atIndexNode); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/BinasciiModuleBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/BinasciiModuleBuiltins.java index 17b18b263a..aa7a0cec47 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/BinasciiModuleBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/BinasciiModuleBuiltins.java @@ -139,8 +139,8 @@ int getBufferLength( @ExportMessage byte readByte(int byteOffset, - @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode) { - int ch = codePointAtIndexNode.execute(str, byteOffset, TS_ENCODING); + @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode) { + int ch = codePointAtIndexNode.execute(str, byteOffset); assert 0 <= ch && ch < 128; // guaranteed because str is ASCII return (byte) ch; } diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/BuiltinFunctions.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/BuiltinFunctions.java index 4afa9c565f..cef9d694fd 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/BuiltinFunctions.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/BuiltinFunctions.java @@ -321,6 +321,7 @@ import com.oracle.truffle.api.source.Source; import com.oracle.truffle.api.strings.TruffleString; import com.oracle.truffle.api.strings.TruffleStringBuilder; +import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; @CoreFunctions(defineModule = J_BUILTINS, isEager = true) public final class BuiltinFunctions extends PythonBuiltins { @@ -587,7 +588,7 @@ interface LongToString { @TruffleBoundary private static TruffleString buildString(boolean isNegative, TruffleString prefix, TruffleString number) { - TruffleStringBuilder sb = TruffleStringBuilder.create(TS_ENCODING, tsbCapacity(3) + number.byteLength(TS_ENCODING)); + TruffleStringBuilderUTF32 sb = TruffleStringBuilder.createUTF32(tsbCapacity(3) + number.byteLength(TS_ENCODING)); if (isNegative) { sb.appendStringUncached(T_MINUS); } @@ -1703,7 +1704,7 @@ static int ord(Object chrObj, @Bind Node inliningTarget, @Cached CastToTruffleStringNode castToStringNode, @Cached TruffleString.CodePointLengthNode codePointLengthNode, - @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode, + @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, @Exclusive @Cached PRaiseNode raiseNode) { TruffleString chr; try { @@ -1715,7 +1716,7 @@ static int ord(Object chrObj, if (len != 1) { throw raiseNode.raise(inliningTarget, TypeError, ErrorMessages.EXPECTED_CHARACTER_BUT_STRING_FOUND, "ord()", len); } - return codePointAtIndexNode.execute(chr, 0, TS_ENCODING); + return codePointAtIndexNode.execute(chr, 0); } @Specialization diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/SysModuleBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/SysModuleBuiltins.java index 3185a01e7d..1ad4b12c52 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/SysModuleBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/SysModuleBuiltins.java @@ -1740,7 +1740,7 @@ Object doHook(VirtualFrame frame, Object[] args, PKeyword[] keywords, @Cached BuiltinFunctions.IsInstanceNode isInstanceNode, @Cached WarningsModuleBuiltins.WarnNode warnNode, @Cached TruffleString.CodePointLengthNode codePointLengthNode, - @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode, + @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, @Cached TruffleString.LastIndexOfCodePointNode lastIndexOfCodePointNode, @Cached TruffleString.SubstringNode substringNode) { TruffleString hookName = OsEnvironGetNode.lookup(frame, boundaryCallData, T_PYTHONBREAKPOINT); @@ -1749,7 +1749,7 @@ Object doHook(VirtualFrame frame, Object[] args, PKeyword[] keywords, } int hookNameLen = codePointLengthNode.execute(hookName, TS_ENCODING); - if (hookNameLen == 1 && codePointAtIndexNode.execute(hookName, 0, TS_ENCODING) == '0') { + if (hookNameLen == 1 && codePointAtIndexNode.execute(hookName, 0) == '0') { // The breakpoint is explicitly no-op'd. return PNone.NONE; } diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextBuiltins.java index 8c44050b37..c0bd0d172f 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextBuiltins.java @@ -1165,7 +1165,7 @@ static Object wrap(Object bufferStructPointer, Object ownerObj, long lenObj, @CachedLibrary(limit = "2") InteropLibrary lib, @Cached CastToJavaIntExactNode castToIntNode, @Cached TruffleString.CodePointLengthNode lengthNode, - @Cached TruffleString.CodePointAtIndexNode atIndexNode, + @Cached TruffleString.CodePointAtIndexUTF32Node atIndexNode, @Bind PythonLanguage language) { int ndim = castToIntNode.execute(inliningTarget, ndimObj); int itemsize = castToIntNode.execute(inliningTarget, itemsizeObj); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextUnicodeBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextUnicodeBuiltins.java index e362ab02d5..ea8e21333d 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextUnicodeBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/cext/PythonCextUnicodeBuiltins.java @@ -177,6 +177,7 @@ import com.oracle.truffle.api.strings.TruffleString.FromNativePointerNode; import com.oracle.truffle.api.strings.TruffleString.SwitchEncodingNode; import com.oracle.truffle.api.strings.TruffleStringBuilder; +import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; public final class PythonCextUnicodeBuiltins { @@ -720,7 +721,7 @@ static Object join(Object separatorObj, Object itemsObj, long seqlenlong, int seqlen = (int) seqlenlong; assert seqlen == seqlenlong; Object[] items = readNode.readPyObjectArray(itemsObj, seqlen); - TruffleStringBuilder sb = TruffleStringBuilder.create(TS_ENCODING); + TruffleStringBuilderUTF32 sb = TruffleStringBuilder.createUTF32(); for (int i = 0; i < items.length; i++) { TruffleString item = toTruffleStringNode.execute(inliningTarget, items[i]); if (i != 0) { @@ -769,7 +770,7 @@ static int doGeneric(Object type, long lindex, @Bind Node inliningTarget, @Cached CastToTruffleStringNode castToStringNode, @Cached TruffleString.CodePointLengthNode lengthNode, - @Cached TruffleString.CodePointAtIndexNode codepointAtIndexNode, + @Cached TruffleString.CodePointAtIndexUTF32Node codepointAtIndexNode, @Cached PRaiseNode raiseNode) { try { TruffleString s = castToStringNode.execute(inliningTarget, type); @@ -778,7 +779,7 @@ static int doGeneric(Object type, long lindex, if (index < 0 || index >= lengthNode.execute(s, TS_ENCODING)) { throw raiseNode.raise(inliningTarget, IndexError, ErrorMessages.STRING_INDEX_OUT_OF_RANGE); } - return codepointAtIndexNode.execute(s, index, TS_ENCODING); + return codepointAtIndexNode.execute(s, index); } catch (CannotCastException e) { throw raiseNode.raise(inliningTarget, TypeError, ErrorMessages.BAD_ARG_TYPE_FOR_BUILTIN_OP); } catch (OverflowException e) { diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/codecs/CharmapNodes.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/codecs/CharmapNodes.java index 49c5ae2c58..bd6962ace1 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/codecs/CharmapNodes.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/codecs/CharmapNodes.java @@ -98,8 +98,8 @@ import com.oracle.truffle.api.nodes.Node; import com.oracle.truffle.api.profiles.InlinedConditionProfile; import com.oracle.truffle.api.strings.TruffleString; -import com.oracle.truffle.api.strings.TruffleString.ErrorHandling; import com.oracle.truffle.api.strings.TruffleStringBuilder; +import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; public final class CharmapNodes { @@ -118,7 +118,7 @@ public abstract static class PyUnicodeBuildEncodingMapNode extends Node { @Specialization static Object doIt(VirtualFrame frame, Node inliningTarget, TruffleString map, @Cached TruffleString.CodePointLengthNode codePointLengthNode, - @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode, + @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, @Cached(inline = false) HashingStorageSetItem setItemNode, @Cached PRaiseNode raiseNode) { int len = Math.min(codePointLengthNode.execute(map, TS_ENCODING), 256); @@ -132,11 +132,11 @@ static Object doIt(VirtualFrame frame, Node inliningTarget, TruffleString map, Arrays.fill(level1, (byte) 0xFF); Arrays.fill(level2, (byte) 0xFF); - if (codePointAtIndexNode.execute(map, 0, TS_ENCODING, ErrorHandling.BEST_EFFORT) != 0) { + if (codePointAtIndexNode.execute(map, 0) != 0) { return doDict(frame, inliningTarget, map, len, codePointAtIndexNode, setItemNode); } for (int i = 1; i < len; ++i) { - int cp = codePointAtIndexNode.execute(map, i, TS_ENCODING, ErrorHandling.BEST_EFFORT); + int cp = codePointAtIndexNode.execute(map, i); if (cp == 0 || cp > 0xFFFF) { return doDict(frame, inliningTarget, map, len, codePointAtIndexNode, setItemNode); } @@ -161,7 +161,7 @@ static Object doIt(VirtualFrame frame, Node inliningTarget, TruffleString map, Arrays.fill(level23, 0, l3Start, (byte) 0xFF); count3 = 0; for (int i = 1; i < len; ++i) { - int cp = codePointAtIndexNode.execute(map, i, TS_ENCODING, ErrorHandling.BEST_EFFORT); + int cp = codePointAtIndexNode.execute(map, i); if (cp == 0xFFFE) { continue; } @@ -178,10 +178,11 @@ static Object doIt(VirtualFrame frame, Node inliningTarget, TruffleString map, return PFactory.createEncodingMap(PythonLanguage.get(inliningTarget), count2, count3, level1, level23); } - private static Object doDict(VirtualFrame frame, Node inliningTarget, TruffleString map, int len, TruffleString.CodePointAtIndexNode codePointAtIndexNode, HashingStorageSetItem setItemNode) { + private static Object doDict(VirtualFrame frame, Node inliningTarget, TruffleString map, int len, TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, + HashingStorageSetItem setItemNode) { HashingStorage store = PDict.createNewStorage(len); for (int i = 0; i < len; ++i) { - int cp = codePointAtIndexNode.execute(map, i, TS_ENCODING, ErrorHandling.BEST_EFFORT); + int cp = codePointAtIndexNode.execute(map, i); store = setItemNode.execute(frame, inliningTarget, store, cp, i); } return PFactory.createDict(PythonLanguage.get(inliningTarget), store); @@ -204,7 +205,7 @@ static byte[] doLatin1(TruffleString src, TruffleString errors, PNone mapping, @Fallback static byte[] doGenericMapping(VirtualFrame frame, Node inliningTarget, TruffleString src, TruffleString errors, Object mapping, @Cached TruffleString.CodePointLengthNode codePointLengthNode, - @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode, + @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, @Cached CharmapEncodeOutputNode charmapEncodeOutputNode, @Cached CharmapEncodingErrorNode charmapEncodingErrorNode) { int len = codePointLengthNode.execute(src, TS_ENCODING); @@ -215,7 +216,7 @@ static byte[] doGenericMapping(VirtualFrame frame, Node inliningTarget, TruffleS int inPos = 0; ErrorHandlerCache cache = new ErrorHandlerCache(); while (inPos < len) { - int cp = codePointAtIndexNode.execute(src, inPos, TS_ENCODING, ErrorHandling.BEST_EFFORT); + int cp = codePointAtIndexNode.execute(src, inPos); boolean x = charmapEncodeOutputNode.execute(frame, inliningTarget, cp, mapping, builder); if (!x) { inPos = charmapEncodingErrorNode.execute(frame, inliningTarget, cache, src, inPos, len, errors, mapping, builder); @@ -237,7 +238,7 @@ abstract static class CharmapEncodingErrorNode extends Node { static int doIt(VirtualFrame frame, Node inliningTarget, ErrorHandlerCache cache, TruffleString src, int pos, int len, TruffleString errors, Object mapping, ByteArrayBuilder builder, @Cached CastToTruffleStringNode castToTruffleStringNode, @Cached TruffleString.CodePointLengthNode codePointLengthNode, - @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode, + @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, @Cached CharmapEncodeLookupNode charmapEncodeLookupNode, @Cached GetErrorHandlerNode getErrorHandlerNode, @Cached CallEncodingErrorHandlerNode callEncodingErrorHandlerNode, @@ -246,7 +247,7 @@ static int doIt(VirtualFrame frame, Node inliningTarget, ErrorHandlerCache cache @Cached RaiseEncodeException raiseEncodeException) { int errEnd = pos; while (errEnd < len) { - int cp = codePointAtIndexNode.execute(src, errEnd, TS_ENCODING, ErrorHandling.BEST_EFFORT); + int cp = codePointAtIndexNode.execute(src, errEnd); if (mapping instanceof PEncodingMap map) { if (encodingMapLookup(cp, map) != -1) { break; @@ -270,7 +271,7 @@ static int doIt(VirtualFrame frame, Node inliningTarget, ErrorHandlerCache cache TruffleString replacement = castToTruffleStringNode.execute(inliningTarget, result.replacement); int repLen = codePointLengthNode.execute(replacement, TS_ENCODING); for (int i = 0; i < repLen; ++i) { - int cp = codePointAtIndexNode.execute(replacement, i, TS_ENCODING, ErrorHandling.BEST_EFFORT); + int cp = codePointAtIndexNode.execute(replacement, i); if (!charmapEncodeOutputNode.execute(frame, inliningTarget, cp, mapping, builder)) { raiseEncodeException.execute(frame, inliningTarget, cache, T_CHARMAP, src, pos, errEnd, CHARACTER_MAPS_TO_UNDEFINED); } @@ -391,7 +392,7 @@ static TruffleString decodeStringMapping(VirtualFrame frame, Object data, Truffl @SuppressWarnings("unused") @Cached @Exclusive PyUnicodeCheckExactNode isBuiltinString, @Cached @Exclusive CastToTruffleStringNode castToTruffleStringNode, @Cached @Shared TruffleString.CodePointLengthNode codePointLengthNode, - @Cached @Shared TruffleString.CodePointAtIndexNode codePointAtIndexNode, + @Cached @Shared TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, @Cached @Shared TruffleStringBuilder.AppendCodePointNode appendCodePointNode, @Cached @Shared TruffleStringBuilder.AppendStringNode appendStringNode, @Cached @Shared TruffleStringBuilder.ToStringNode toStringNode, @@ -405,7 +406,7 @@ static TruffleString decodeStringMapping(VirtualFrame frame, Object data, Truffl int pos = 0; TruffleString mapping = castToTruffleStringNode.execute(inliningTarget, mappingObj); int mappingLen = codePointLengthNode.execute(mapping, TS_ENCODING); - TruffleStringBuilder tsb = TruffleStringBuilder.create(TS_ENCODING); + TruffleStringBuilderUTF32 tsb = TruffleStringBuilder.createUTF32(); int errorStartPos; int srcLen; do { @@ -420,7 +421,7 @@ static TruffleString decodeStringMapping(VirtualFrame frame, Object data, Truffl errorStartPos = pos; break; } - int cp = codePointAtIndexNode.execute(mapping, index, TS_ENCODING); + int cp = codePointAtIndexNode.execute(mapping, index); if (cp == UNDEFINED_MAPPING) { errorStartPos = pos; break; @@ -455,7 +456,7 @@ static TruffleString decodeGenericMapping(VirtualFrame frame, Object data, Truff @Cached PyUnicodeCheckNode pyUnicodeCheckNode, @Cached @Exclusive CastToTruffleStringNode castToTruffleStringNode, @Cached @Shared TruffleString.CodePointLengthNode codePointLengthNode, - @Cached @Shared TruffleString.CodePointAtIndexNode codePointAtIndexNode, + @Cached @Shared TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, @Cached @Shared TruffleStringBuilder.AppendCodePointNode appendCodePointNode, @Cached @Shared TruffleStringBuilder.AppendStringNode appendStringNode, @Cached @Shared TruffleStringBuilder.ToStringNode toStringNode, @@ -471,7 +472,7 @@ static TruffleString decodeGenericMapping(VirtualFrame frame, Object data, Truff ErrorHandlerCache cache = new ErrorHandlerCache(); Object srcObj = data; int pos = 0; - TruffleStringBuilder tsb = TruffleStringBuilder.create(TS_ENCODING); + TruffleStringBuilderUTF32 tsb = TruffleStringBuilder.createUTF32(); int errorStartPos; int srcLen; do { @@ -508,7 +509,7 @@ static TruffleString decodeGenericMapping(VirtualFrame frame, Object data, Truff } else if (strValuesProfile.profile(inliningTarget, pyUnicodeCheckNode.execute(inliningTarget, item))) { TruffleString ts = castToTruffleStringNode.execute(inliningTarget, item); if (codePointLengthNode.execute(ts, TS_ENCODING) == 1) { - int cp = codePointAtIndexNode.execute(ts, 0, TS_ENCODING, ErrorHandling.BEST_EFFORT); + int cp = codePointAtIndexNode.execute(ts, 0); if (cp == UNDEFINED_MAPPING) { errorStartPos = pos; break; diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/codecs/ErrorHandlers.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/codecs/ErrorHandlers.java index cc14ed4768..62c6d0a47a 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/codecs/ErrorHandlers.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/codecs/ErrorHandlers.java @@ -113,8 +113,8 @@ import com.oracle.truffle.api.strings.TruffleString; import com.oracle.truffle.api.strings.TruffleString.CodeRange; import com.oracle.truffle.api.strings.TruffleString.Encoding; -import com.oracle.truffle.api.strings.TruffleString.ErrorHandling; import com.oracle.truffle.api.strings.TruffleStringBuilder; +import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; /** * Implementation of default error handlers and internal helper nodes for calling error handlers. @@ -363,7 +363,7 @@ static Object doEncode(PBaseException exception, @Cached PyUnicodeEncodeOrTranslateErrorGetObjectNode getObjectNode, @Cached PyUnicodeEncodeOrTranslateErrorGetStartNode getStartNode, @Cached PyUnicodeEncodeOrTranslateErrorGetEndNode getEndNode, - @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode, + @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, @Cached TruffleString.FromByteArrayNode fromByteArrayNode, @Cached TruffleString.SwitchEncodingNode switchEncodingNode) { TruffleString src = getObjectNode.execute(inliningTarget, exception); @@ -371,12 +371,12 @@ static Object doEncode(PBaseException exception, int end = getEndNode.execute(inliningTarget, exception); int replacementLength = 0; for (int i = start; i < end; ++i) { - replacementLength += getXmlCharRefReplacementLength(codePointAtIndexNode.execute(src, i, TS_ENCODING, ErrorHandling.BEST_EFFORT)); + replacementLength += getXmlCharRefReplacementLength(codePointAtIndexNode.execute(src, i)); } byte[] replacement = new byte[replacementLength]; int pos = 0; for (int i = start; i < end; ++i) { - pos = appendXmlCharRefReplacement(replacement, pos, codePointAtIndexNode.execute(src, i, TS_ENCODING, ErrorHandling.BEST_EFFORT)); + pos = appendXmlCharRefReplacement(replacement, pos, codePointAtIndexNode.execute(src, i)); } TruffleString resultAscii = fromByteArrayNode.execute(replacement, Encoding.US_ASCII, false); return PFactory.createTuple(language, new Object[]{switchEncodingNode.execute(resultAscii, TS_ENCODING), end}); @@ -436,7 +436,7 @@ static Object doEncodeOrTranslateException(PBaseException exception, @Cached PyUnicodeEncodeOrTranslateErrorGetObjectNode getObjectNode, @Cached PyUnicodeEncodeOrTranslateErrorGetStartNode getStartNode, @Cached PyUnicodeEncodeOrTranslateErrorGetEndNode getEndNode, - @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode, + @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, @Cached @Shared TruffleString.FromByteArrayNode fromByteArrayNode, @Cached @Shared TruffleString.SwitchEncodingNode switchEncodingNode) { int start = getStartNode.execute(inliningTarget, exception); @@ -447,7 +447,7 @@ static Object doEncodeOrTranslateException(PBaseException exception, } int len = 0; for (int i = start; i < end; ++i) { - int cp = codePointAtIndexNode.execute(src, i, TS_ENCODING, ErrorHandling.BEST_EFFORT); + int cp = codePointAtIndexNode.execute(src, i); if (cp >= 0x10000) { len += 1 + 1 + 8; // \\UNNNNNNNN } else if (cp >= 0x100) { @@ -459,7 +459,7 @@ static Object doEncodeOrTranslateException(PBaseException exception, byte[] replacement = new byte[len]; int pos = 0; for (int i = start; i < end; i++) { - int cp = codePointAtIndexNode.execute(src, i, TS_ENCODING, ErrorHandling.BEST_EFFORT); + int cp = codePointAtIndexNode.execute(src, i); pos = BytesUtils.unicodeNonAsciiEscape(cp, pos, replacement, true); } TruffleString resultAscii = fromByteArrayNode.execute(replacement, Encoding.US_ASCII, false); @@ -486,7 +486,7 @@ static Object doEncode(PBaseException exception, @Cached PyUnicodeEncodeOrTranslateErrorGetObjectNode getObjectNode, @Cached PyUnicodeEncodeOrTranslateErrorGetStartNode getStartNode, @Cached PyUnicodeEncodeOrTranslateErrorGetEndNode getEndNode, - @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode, + @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, @Cached TruffleString.FromByteArrayNode fromByteArrayNode, @Cached TruffleString.SwitchEncodingNode switchEncodingNode, @Cached TruffleString.FromJavaStringNode fromJavaStringNode, @@ -499,10 +499,10 @@ static Object doEncode(PBaseException exception, if (start >= end) { return PFactory.createTuple(language, new Object[]{T_EMPTY_STRING, start}); } - TruffleStringBuilder tsb = TruffleStringBuilder.create(TS_ENCODING); + TruffleStringBuilderUTF32 tsb = TruffleStringBuilder.createUTF32(); byte[] buf = new byte[1 + 1 + 8]; // \UNNNNNNNN for (int i = start; i < end; ++i) { - int cp = codePointAtIndexNode.execute(src, i, TS_ENCODING, ErrorHandling.BEST_EFFORT); + int cp = codePointAtIndexNode.execute(src, i); String name = getUnicodeName(cp); if (name != null) { appendCodePointNode.execute(tsb, '\\'); @@ -540,7 +540,7 @@ static Object doEncode(PBaseException exception, @Cached PyUnicodeEncodeOrTranslateErrorGetEndNode getEndNode, @Cached PyUnicodeEncodeErrorGetEncodingNode getEncodingNode, @Exclusive @Cached GetStandardEncodingNode getStandardEncodingNode, - @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode, + @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, @Exclusive @Cached PRaiseNode raiseNode) { int start = getStartNode.execute(inliningTarget, exception); int end = getEndNode.execute(inliningTarget, exception); @@ -556,7 +556,7 @@ static Object doEncode(PBaseException exception, byte[] result = new byte[encoding.byteLength * (end - start)]; int pos = 0; for (int i = start; i < end; ++i) { - int cp = codePointAtIndexNode.execute(src, i, TS_ENCODING, ErrorHandling.BEST_EFFORT); + int cp = codePointAtIndexNode.execute(src, i); if (!isSurrogate(cp)) { throw raiseNode.raiseExceptionObject(inliningTarget, exception); } @@ -674,7 +674,7 @@ static Object doEncode(PBaseException exception, @Cached PyUnicodeEncodeOrTranslateErrorGetObjectNode getObjectNode, @Cached PyUnicodeEncodeOrTranslateErrorGetStartNode getStartNode, @Cached PyUnicodeEncodeOrTranslateErrorGetEndNode getEndNode, - @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode, + @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, @Exclusive @Cached PRaiseNode raiseNode) { int start = getStartNode.execute(inliningTarget, exception); int end = getEndNode.execute(inliningTarget, exception); @@ -685,7 +685,7 @@ static Object doEncode(PBaseException exception, byte[] result = new byte[end - start]; int pos = 0; for (int i = start; i < end; ++i) { - int cp = codePointAtIndexNode.execute(src, i, TS_ENCODING, ErrorHandling.BEST_EFFORT); + int cp = codePointAtIndexNode.execute(src, i); if (cp < 0xdc80 || cp > 0xdcff) { throw raiseNode.raiseExceptionObject(inliningTarget, exception); } @@ -712,7 +712,7 @@ static Object doDecode(VirtualFrame frame, PBaseException exception, int end = getEndNode.execute(inliningTarget, exception); Object object = getObjectNode.execute(inliningTarget, exception); Object srcBuf = acquireLib.acquireReadonly(object, frame, callData); - TruffleStringBuilder tsb = TruffleStringBuilder.create(TS_ENCODING); + TruffleStringBuilderUTF32 tsb = TruffleStringBuilder.createUTF32(); try { byte[] src = accessLib.getInternalOrCopiedByteArray(srcBuf); int consumed = 0; diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/csv/CSVDialectBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/csv/CSVDialectBuiltins.java index fe22257eb4..b73414cc67 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/csv/CSVDialectBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/csv/CSVDialectBuiltins.java @@ -331,11 +331,11 @@ private static Object createCSVDialect(Node raisingNode, Object cls, TruffleStri } // delimiter cannot be NOT_SET - int delimiterCodePoint = TruffleString.CodePointAtIndexNode.getUncached().execute(delimiter, 0, TS_ENCODING); + int delimiterCodePoint = TruffleString.CodePointAtIndexUTF32Node.getUncached().execute(delimiter, 0); int escapeCharCodePoint = TruffleString.EqualNode.getUncached().execute(escapeChar, T_NOT_SET, TS_ENCODING) ? NOT_SET_CODEPOINT - : TruffleString.CodePointAtIndexNode.getUncached().execute(escapeChar, 0, TS_ENCODING); + : TruffleString.CodePointAtIndexUTF32Node.getUncached().execute(escapeChar, 0); int quoteCharCodePoint = TruffleString.EqualNode.getUncached().execute(quoteChar, T_NOT_SET, TS_ENCODING) ? NOT_SET_CODEPOINT - : TruffleString.CodePointAtIndexNode.getUncached().execute(quoteChar, 0, TS_ENCODING); + : TruffleString.CodePointAtIndexUTF32Node.getUncached().execute(quoteChar, 0); return PFactory.createCSVDialect(cls, TypeNodes.GetInstanceShape.executeUncached(cls), delimiter, delimiterCodePoint, doubleQuote, escapeChar, escapeCharCodePoint, lineTerminator, quoteChar, quoteCharCodePoint, quoting, diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/csv/CSVReader.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/csv/CSVReader.java index 72dd68076d..70b373076b 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/csv/CSVReader.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/csv/CSVReader.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0 @@ -41,11 +41,11 @@ package com.oracle.graal.python.builtins.modules.csv; import static com.oracle.graal.python.builtins.modules.csv.CSVReader.ReaderState.START_RECORD; -import static com.oracle.graal.python.util.PythonUtils.TS_ENCODING; import com.oracle.graal.python.builtins.objects.object.PythonBuiltinObject; import com.oracle.truffle.api.object.Shape; import com.oracle.truffle.api.strings.TruffleStringBuilder; +import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; public final class CSVReader extends PythonBuiltinObject { @@ -64,7 +64,7 @@ enum ReaderState { final Object inputIter; /* iterate over this for input lines */ final CSVDialect dialect; /* parsing dialect */ ReaderState state; /* current CSV parse state */ - TruffleStringBuilder field; /* temporary buffer */ + TruffleStringBuilderUTF32 field; /* temporary buffer */ boolean numericField; /* treat field as numeric */ int lineNum; /* Source-file line number */ long fieldLimit; /* Cached copy of CSVModuleBuiltins.fieldLimit */ @@ -77,7 +77,7 @@ public CSVReader(Object cls, Shape instanceShape, Object inputIter, CSVDialect d } void parseReset() { - this.field = TruffleStringBuilder.create(TS_ENCODING); + this.field = TruffleStringBuilder.createUTF32(); this.state = START_RECORD; this.numericField = false; } diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/csv/CSVReaderBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/csv/CSVReaderBuiltins.java index 8948b8fb0b..6bed4ce6a7 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/csv/CSVReaderBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/csv/CSVReaderBuiltins.java @@ -335,7 +335,7 @@ private static void parseProcessCodePoint(Node inliningTarget, CSVReader self, P private static void parseSaveField(Node inliningTarget, CSVReader self, PList fields, ToStringNode toStringNode, PyNumberFloatNode pyNumberFloatNode, AppendNode appendNode) { TruffleString field = toStringNode.execute(self.field); - self.field = TruffleStringBuilder.create(TS_ENCODING); + self.field = TruffleStringBuilder.createUTF32(); if (self.numericField) { self.numericField = false; appendNode.execute(fields, pyNumberFloatNode.execute(inliningTarget, field)); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/csv/CSVWriterBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/csv/CSVWriterBuiltins.java index c9012fb4d4..b63eaace9c 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/csv/CSVWriterBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/csv/CSVWriterBuiltins.java @@ -79,6 +79,7 @@ import com.oracle.truffle.api.nodes.Node; import com.oracle.truffle.api.strings.TruffleString; import com.oracle.truffle.api.strings.TruffleStringBuilder; +import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; import com.oracle.truffle.api.strings.TruffleStringIterator; @CoreFunctions(extendClasses = PythonBuiltinClassType.CSVWriter) @@ -118,7 +119,7 @@ static Object doIt(VirtualFrame frame, CSVWriter self, Object seq, } // Join all fields of passed in sequence in internal buffer. - TruffleStringBuilder sb = TruffleStringBuilder.create(TS_ENCODING); + TruffleStringBuilderUTF32 sb = TruffleStringBuilder.createUTF32(); CSVDialect dialect = self.dialect; boolean first = true; boolean nullField = false; @@ -179,7 +180,7 @@ static Object doIt(VirtualFrame frame, CSVWriter self, Object seq, return callNode.executeObject(frame, self.write, toStringNode.execute(sb)); } - static void joinAppend(Node inliningTarget, TruffleStringBuilder sb, CSVWriter self, TruffleString field, boolean quotedArg, + static void joinAppend(Node inliningTarget, TruffleStringBuilderUTF32 sb, CSVWriter self, TruffleString field, boolean quotedArg, PRaiseNode raiseNode, TruffleStringBuilder.AppendStringNode appendStringNode, TruffleString.CodePointLengthNode codePointLengthNode, @@ -211,12 +212,12 @@ static void joinAppend(Node inliningTarget, TruffleStringBuilder sb, CSVWriter s @GenerateInline(false) // 36 -> 17 protected abstract static class JoinAppendData extends Node { - abstract boolean execute(Node inliningTarget, TruffleStringBuilder sb, CSVDialect dialect, TruffleString field, boolean quoted, boolean copyPhase, + abstract boolean execute(Node inliningTarget, TruffleStringBuilderUTF32 sb, CSVDialect dialect, TruffleString field, boolean quoted, boolean copyPhase, PRaiseNode raiseNode, TruffleStringBuilder.AppendStringNode appendStringNode); @Specialization - static boolean joinAppendData(Node inliningTarget, TruffleStringBuilder sb, CSVDialect dialect, TruffleString field, boolean quotedArg, boolean isCopyPhase, + static boolean joinAppendData(Node inliningTarget, TruffleStringBuilderUTF32 sb, CSVDialect dialect, TruffleString field, boolean quotedArg, boolean isCopyPhase, PRaiseNode raiseNode, TruffleStringBuilder.AppendStringNode appendStringNode, @Cached TruffleString.CreateCodePointIteratorNode createCodePointIteratorNode, @@ -278,14 +279,14 @@ static boolean joinAppendData(Node inliningTarget, TruffleStringBuilder sb, CSVD return quoted; } - static void addChar(TruffleStringBuilder sb, TruffleString c, boolean isCopyPhase, + static void addChar(TruffleStringBuilderUTF32 sb, TruffleString c, boolean isCopyPhase, TruffleStringBuilder.AppendStringNode appendStringNode) { if (isCopyPhase) { appendStringNode.execute(sb, c); } } - static void addChar(TruffleStringBuilder sb, int c, boolean isCopyPhase, + static void addChar(TruffleStringBuilderUTF32 sb, int c, boolean isCopyPhase, TruffleStringBuilder.AppendCodePointNode appendCodePointNode) { if (isCopyPhase) { appendCodePointNode.execute(sb, c, 1, true); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/functools/PartialBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/functools/PartialBuiltins.java index 562715bf3e..4d223f6cc3 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/functools/PartialBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/functools/PartialBuiltins.java @@ -53,15 +53,14 @@ import static com.oracle.graal.python.nodes.StringLiterals.T_EQ; import static com.oracle.graal.python.nodes.StringLiterals.T_LPAREN; import static com.oracle.graal.python.nodes.StringLiterals.T_RPAREN; -import static com.oracle.graal.python.util.PythonUtils.TS_ENCODING; import java.util.List; import com.oracle.graal.python.PythonLanguage; +import com.oracle.graal.python.annotations.Builtin; import com.oracle.graal.python.annotations.Slot; import com.oracle.graal.python.annotations.Slot.SlotKind; import com.oracle.graal.python.annotations.Slot.SlotSignature; -import com.oracle.graal.python.annotations.Builtin; import com.oracle.graal.python.builtins.CoreFunctions; import com.oracle.graal.python.builtins.PythonBuiltinClassType; import com.oracle.graal.python.builtins.PythonBuiltins; @@ -123,6 +122,7 @@ import com.oracle.truffle.api.strings.TruffleString; import com.oracle.truffle.api.strings.TruffleStringBuilder; import com.oracle.truffle.api.strings.TruffleStringBuilder.AppendStringNode; +import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; @CoreFunctions(extendClasses = PythonBuiltinClassType.PPartial) public final class PartialBuiltins extends PythonBuiltins { @@ -483,7 +483,7 @@ static Object callWDictWKw(VirtualFrame frame, PPartial self, Object[] args, PKe @Slot(value = SlotKind.tp_repr, isComplex = true) @GenerateNodeFactory abstract static class PartialReprNode extends PythonUnaryBuiltinNode { - private static void reprArgs(VirtualFrame frame, Node inliningTarget, PPartial partial, TruffleStringBuilder sb, PyObjectReprAsTruffleStringNode reprNode, + private static void reprArgs(VirtualFrame frame, Node inliningTarget, PPartial partial, TruffleStringBuilderUTF32 sb, PyObjectReprAsTruffleStringNode reprNode, TruffleStringBuilder.AppendStringNode appendStringNode) { for (Object arg : partial.getArgs()) { appendStringNode.execute(sb, T_COMMA_SPACE); @@ -491,7 +491,7 @@ private static void reprArgs(VirtualFrame frame, Node inliningTarget, PPartial p } } - private static void reprKwArgs(VirtualFrame frame, Node inliningTarget, PPartial partial, TruffleStringBuilder sb, PyObjectReprAsTruffleStringNode reprNode, + private static void reprKwArgs(VirtualFrame frame, Node inliningTarget, PPartial partial, TruffleStringBuilderUTF32 sb, PyObjectReprAsTruffleStringNode reprNode, PyObjectStrAsTruffleStringNode strNode, HashingStorageGetIterator getHashingStorageIterator, HashingStorageIteratorNext hashingStorageIteratorNext, HashingStorageIteratorKey hashingStorageIteratorKey, HashingStorageGetItem getItem, AppendStringNode appendStringNode) { final PDict kwDict = partial.getKw(); @@ -530,7 +530,7 @@ public static TruffleString repr(VirtualFrame frame, PPartial partial, return T_ELLIPSIS; } try { - TruffleStringBuilder sb = TruffleStringBuilder.create(TS_ENCODING); + TruffleStringBuilderUTF32 sb = TruffleStringBuilder.createUTF32(); appendStringNode.execute(sb, name); appendStringNode.execute(sb, T_LPAREN); appendStringNode.execute(sb, reprNode.execute(frame, inliningTarget, partial.getFn())); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/io/IncrementalNewlineDecoderBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/io/IncrementalNewlineDecoderBuiltins.java index a1994d0c23..7231427974 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/io/IncrementalNewlineDecoderBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/io/IncrementalNewlineDecoderBuiltins.java @@ -63,10 +63,10 @@ import com.oracle.graal.python.PythonLanguage; import com.oracle.graal.python.annotations.ArgumentClinic; +import com.oracle.graal.python.annotations.Builtin; import com.oracle.graal.python.annotations.Slot; import com.oracle.graal.python.annotations.Slot.SlotKind; import com.oracle.graal.python.annotations.Slot.SlotSignature; -import com.oracle.graal.python.annotations.Builtin; import com.oracle.graal.python.builtins.CoreFunctions; import com.oracle.graal.python.builtins.PythonBuiltins; import com.oracle.graal.python.builtins.objects.PNone; @@ -101,6 +101,7 @@ import com.oracle.truffle.api.profiles.InlinedConditionProfile; import com.oracle.truffle.api.strings.TruffleString; import com.oracle.truffle.api.strings.TruffleStringBuilder; +import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; @CoreFunctions(extendClasses = PIncrementalNewlineDecoder) public final class IncrementalNewlineDecoderBuiltins extends PythonBuiltins { @@ -175,7 +176,7 @@ static TruffleString noDecoder(VirtualFrame frame, PNLDecoder self, Object input @Cached InlinedConditionProfile len0Profile, @Cached CastToTruffleStringNode toString, @Cached TruffleString.CodePointLengthNode codePointLengthNode, - @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode, + @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, @Cached TruffleString.IndexOfCodePointNode indexOfCodePointNode, @Cached TruffleString.SubstringNode substringNode, @Cached TruffleString.ConcatNode concatNode, @@ -202,7 +203,7 @@ static TruffleString noDecoder(VirtualFrame frame, PNLDecoder self, Object input * one pass */ if (!isFinal) { - if (outputLen > 0 && codePointAtIndexNode.execute(output, outputLen - 1, TS_ENCODING) == '\r') { + if (outputLen > 0 && codePointAtIndexNode.execute(output, outputLen - 1) == '\r') { output = substringNode.execute(output, 0, outputLen - 1, TS_ENCODING, false); self.setPendingCR(true); } @@ -247,15 +248,15 @@ static TruffleString noDecoder(VirtualFrame frame, PNLDecoder self, Object input int i = 0; while (i < len && seenNewline != SEEN_ALL) { - while (i < len && codePointAtIndexNode.execute(output, i, TS_ENCODING) > '\r') { + while (i < len && codePointAtIndexNode.execute(output, i) > '\r') { i++; } - int c = i < len ? codePointAtIndexNode.execute(output, i++, TS_ENCODING) : '\0'; + int c = i < len ? codePointAtIndexNode.execute(output, i++) : '\0'; if (c == '\n') { seenNewline |= SEEN_LF; } else if (c == '\r') { assert i < len || isFinal; - if (i < len && codePointAtIndexNode.execute(output, i, TS_ENCODING) == '\n') { + if (i < len && codePointAtIndexNode.execute(output, i) == '\n') { seenNewline |= SEEN_CRLF; i++; } else { @@ -264,11 +265,11 @@ static TruffleString noDecoder(VirtualFrame frame, PNLDecoder self, Object input } } } else { - TruffleStringBuilder sb = TruffleStringBuilder.create(TS_ENCODING, output.byteLength(TS_ENCODING)); + TruffleStringBuilderUTF32 sb = TruffleStringBuilder.createUTF32(output.byteLength(TS_ENCODING)); int in = 0; while (true) { int c = '\0'; - while (in < len && (c = codePointAtIndexNode.execute(output, in++, TS_ENCODING)) > '\r') { + while (in < len && (c = codePointAtIndexNode.execute(output, in++)) > '\r') { appendCodePointNode.execute(sb, c, 1, true); } if (c == '\n') { @@ -277,7 +278,7 @@ static TruffleString noDecoder(VirtualFrame frame, PNLDecoder self, Object input continue; } if (c == '\r') { - if (in < len && codePointAtIndexNode.execute(output, in, TS_ENCODING) == '\n') { + if (in < len && codePointAtIndexNode.execute(output, in) == '\n') { in++; seenNewline |= SEEN_CRLF; } else { diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/io/PStringIO.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/io/PStringIO.java index ea23018729..c07c324cee 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/io/PStringIO.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/io/PStringIO.java @@ -40,8 +40,6 @@ */ package com.oracle.graal.python.builtins.modules.io; -import static com.oracle.graal.python.util.PythonUtils.TS_ENCODING; - import com.oracle.truffle.api.dsl.Cached; import com.oracle.truffle.api.dsl.GenerateCached; import com.oracle.truffle.api.dsl.GenerateInline; @@ -50,6 +48,7 @@ import com.oracle.truffle.api.object.Shape; import com.oracle.truffle.api.strings.TruffleString; import com.oracle.truffle.api.strings.TruffleStringBuilder; +import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; public final class PStringIO extends PTextIOBase { @@ -63,8 +62,8 @@ public final class PStringIO extends PTextIOBase { private boolean closed; private TruffleString cachedString; - private TruffleStringBuilder buf; - private TruffleStringBuilder sb; + private TruffleStringBuilderUTF32 buf; + private TruffleStringBuilderUTF32 sb; private int pos; private int stringSize; @@ -73,11 +72,11 @@ public PStringIO(Object cls, Shape instanceShape) { buf = null; } - public TruffleStringBuilder getBuf() { + public TruffleStringBuilderUTF32 getBuf() { return buf; } - public void setBuf(TruffleStringBuilder buf) { + public void setBuf(TruffleStringBuilderUTF32 buf) { this.buf = buf; cachedString = null; } @@ -129,7 +128,7 @@ public boolean isAccumulating() { public void setAccumulating() { assert stringSize == 0 && !isAccumulating(); - sb = TruffleStringBuilder.create(TS_ENCODING); + sb = TruffleStringBuilder.createUTF32(); cachedString = null; } @@ -140,7 +139,7 @@ public void append(TruffleString str, TruffleStringBuilder.AppendStringNode appe public void setRealized() { sb = null; - buf = TruffleStringBuilder.create(TS_ENCODING); + buf = TruffleStringBuilder.createUTF32(); } public TruffleString makeIntermediate(TruffleStringBuilder.ToStringNode toStringNode) { @@ -151,7 +150,7 @@ public TruffleString makeIntermediate(TruffleStringBuilder.ToStringNode toString @Override public void clearAll() { super.clearAll(); - buf = TruffleStringBuilder.create(TS_ENCODING); + buf = TruffleStringBuilder.createUTF32(); sb = null; cachedString = null; setWriteNewline(null); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/io/StringIOBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/io/StringIOBuiltins.java index b5514155b1..b1c2a16f1b 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/io/StringIOBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/io/StringIOBuiltins.java @@ -135,6 +135,7 @@ import com.oracle.truffle.api.nodes.Node; import com.oracle.truffle.api.strings.TruffleString; import com.oracle.truffle.api.strings.TruffleStringBuilder; +import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; @CoreFunctions(extendClasses = PythonBuiltinClassType.PStringIO) public final class StringIOBuiltins extends PythonBuiltins { @@ -225,7 +226,7 @@ static void writeString(VirtualFrame frame, Node inliningTarget, PStringIO self, self.realize(); } - TruffleStringBuilder sb = self.getBuf(); + TruffleStringBuilderUTF32 sb = self.getBuf(); self.invalidateBufCache(); if (self.getPos() > self.getStringSize()) { /* @@ -244,7 +245,7 @@ static void writeString(VirtualFrame frame, Node inliningTarget, PStringIO self, // Without that API, we have to create a new builder and replace the old one with it TruffleString currentBuf = toStringNode.execute(sb, true); TruffleString left = substringNode.execute(currentBuf, 0, self.getPos(), TS_ENCODING, true); - sb = TruffleStringBuilder.create(TS_ENCODING, self.getPos() + decodedLen); + sb = TruffleStringBuilder.createUTF32(self.getPos() + decodedLen); self.setBuf(sb); appendStringNode.execute(sb, left); appendStringNode.execute(sb, decoded); @@ -295,7 +296,7 @@ static PNone init(VirtualFrame frame, PStringIO self, TruffleString initialValue @Cached IncrementalNewlineDecoderBuiltins.InitNode initNode, @Cached StringReplaceNode replaceNode, @Cached TruffleString.CodePointLengthNode codePointLengthNode, - @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode, + @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, @Cached TruffleString.SubstringNode substringNode, @Cached TruffleStringBuilder.AppendStringNode appendStringNode, @Cached TruffleStringBuilder.ToStringNode toStringNode, @@ -322,7 +323,7 @@ static PNone init(VirtualFrame frame, PStringIO self, TruffleString initialValue if (newline != null) { self.setReadNewline(newline); } - self.setReadUniversal(newline == null || newline.isEmpty() || codePointAtIndexNode.execute(newline, 0, TS_ENCODING) == '\0'); + self.setReadUniversal(newline == null || newline.isEmpty() || codePointAtIndexNode.execute(newline, 0) == '\0'); self.setReadTranslate(newline == null); /*- If newline == "", we don't translate anything. @@ -330,7 +331,7 @@ static PNone init(VirtualFrame frame, PStringIO self, TruffleString initialValue (for newline == None, TextIOWrapper translates to os.linesep, but it is pointless for StringIO) */ - if (newline != null && !newline.isEmpty() && codePointAtIndexNode.execute(newline, 0, TS_ENCODING) == '\r') { + if (newline != null && !newline.isEmpty() && codePointAtIndexNode.execute(newline, 0) == '\r') { self.setWriteNewline(self.getReadNewline()); } @@ -476,7 +477,7 @@ static Object truncate(PStringIO self, int size, @Shared @Cached TruffleStringBuilder.AppendStringNode appendStringNode) { self.realize(); TruffleString currentBuf = toStringNode.execute(self.getBuf(), true); - TruffleStringBuilder newBuf = TruffleStringBuilder.create(TS_ENCODING, size); + TruffleStringBuilderUTF32 newBuf = TruffleStringBuilder.createUTF32(size); appendStringNode.execute(newBuf, substringNode.execute(currentBuf, 0, size, TS_ENCODING, true)); self.setBuf(newBuf); self.setStringsize(size); @@ -690,7 +691,7 @@ static Object doit(VirtualFrame frame, PStringIO self, PTuple state, TruffleString buf = toString.execute(inliningTarget, array[0]); int bufsize = codePointLengthNode.execute(buf, TS_ENCODING); self.setRealized(); - TruffleStringBuilder newBuf = TruffleStringBuilder.create(TS_ENCODING, bufsize); + TruffleStringBuilderUTF32 newBuf = TruffleStringBuilder.createUTF32(bufsize); appendStringNode.execute(newBuf, buf); self.setBuf(newBuf); self.setStringsize(bufsize); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/io/TextIOWrapperBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/io/TextIOWrapperBuiltins.java index a63294e45b..f4cdabfe1d 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/io/TextIOWrapperBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/io/TextIOWrapperBuiltins.java @@ -128,10 +128,10 @@ import com.oracle.graal.python.PythonLanguage; import com.oracle.graal.python.annotations.ArgumentClinic; +import com.oracle.graal.python.annotations.Builtin; import com.oracle.graal.python.annotations.Slot; import com.oracle.graal.python.annotations.Slot.SlotKind; import com.oracle.graal.python.annotations.Slot.SlotSignature; -import com.oracle.graal.python.annotations.Builtin; import com.oracle.graal.python.builtins.CoreFunctions; import com.oracle.graal.python.builtins.PythonBuiltins; import com.oracle.graal.python.builtins.modules.io.TextIOWrapperNodes.WriteFlushNode; @@ -188,6 +188,7 @@ import com.oracle.truffle.api.profiles.InlinedConditionProfile; import com.oracle.truffle.api.strings.TruffleString; import com.oracle.truffle.api.strings.TruffleStringBuilder; +import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; @CoreFunctions(extendClasses = PTextIOWrapper) public final class TextIOWrapperBuiltins extends PythonBuiltins { @@ -339,7 +340,7 @@ static Object reconfigure(VirtualFrame frame, PTextIO self, Object encodingObj, @Cached PyObjectCallMethodObjArgs callMethod, @Cached PyObjectIsTrueNode isTrueNode, @Cached TruffleString.CodePointLengthNode codePointLengthNode, - @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode, + @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, @Cached TruffleString.EqualNode equalNode, @Cached TextIOWrapperNodes.ChangeEncodingNode changeEncodingNode) { TruffleString newline = null; @@ -512,7 +513,7 @@ static TruffleString read(VirtualFrame frame, PTextIO self, int n, writeFlushNode.execute(frame, inliningTarget, self); TruffleString result = self.consumeDecodedChars(n, substringNode, false); int remaining = n - codePointLengthNode.execute(result, TS_ENCODING); - TruffleStringBuilder chunks = null; + TruffleStringBuilderUTF32 chunks = null; /* Keep reading chunks until we have n characters to return */ while (remaining > 0) { boolean res = readChunkNode.execute(frame, inliningTarget, self, remaining); @@ -522,7 +523,7 @@ static TruffleString read(VirtualFrame frame, PTextIO self, int n, } if (!result.isEmpty()) { if (chunks == null) { - chunks = TruffleStringBuilder.create(TS_ENCODING); + chunks = TruffleStringBuilder.createUTF32(); } appendStringNode.execute(chunks, result); } diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/io/TextIOWrapperNodes.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/io/TextIOWrapperNodes.java index d06f0dd848..5eb40a3447 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/io/TextIOWrapperNodes.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/io/TextIOWrapperNodes.java @@ -118,19 +118,20 @@ import com.oracle.truffle.api.profiles.InlinedConditionProfile; import com.oracle.truffle.api.strings.TruffleString; import com.oracle.truffle.api.strings.TruffleStringBuilder; +import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; public abstract class TextIOWrapperNodes { public static final TruffleString T_CODECS_OPEN = tsLiteral("codecs.open()"); protected static void validateNewline(TruffleString str, Node inliningTarget, PRaiseNode raise, TruffleString.CodePointLengthNode codePointLengthNode, - TruffleString.CodePointAtIndexNode codePointAtIndexNode) { + TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode) { int len = codePointLengthNode.execute(str, TS_ENCODING); - int c = len == 0 ? '\0' : codePointAtIndexNode.execute(str, 0, TS_ENCODING); + int c = len == 0 ? '\0' : codePointAtIndexNode.execute(str, 0); if (c != '\0' && !(c == '\n' && len == 1) && !(c == '\r' && len == 1) && - !(c == '\r' && len == 2 && codePointAtIndexNode.execute(str, 1, TS_ENCODING) == '\n')) { + !(c == '\r' && len == 2 && codePointAtIndexNode.execute(str, 1) == '\n')) { throw raise.raise(inliningTarget, ValueError, ILLEGAL_NEWLINE_VALUE_S, str); } } @@ -329,12 +330,12 @@ static int doNonUniversal(@SuppressWarnings("unused") PTextIOBase self, TruffleS @Shared @Cached TruffleString.CodePointLengthNode codePointLengthNode, @Cached TruffleString.IndexOfStringNode indexOfStringNode, @Shared @Cached TruffleString.IndexOfCodePointNode indexOfCodePointNode, - @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode) { + @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode) { int len = codePointLengthNode.execute(line, TS_ENCODING); TruffleString readNl = self.getReadNewline(); int nlLen = codePointLengthNode.execute(readNl, TS_ENCODING); if (nlLen == 1) { - int cp = codePointAtIndexNode.execute(readNl, 0, TS_ENCODING); + int cp = codePointAtIndexNode.execute(readNl, 0); int pos = indexOfCodePointNode.execute(line, cp, start, len, TS_ENCODING); if (pos >= 0) { return pos - start + 1; @@ -346,7 +347,7 @@ static int doNonUniversal(@SuppressWarnings("unused") PTextIOBase self, TruffleS if (pos >= 0) { return pos - start + nlLen; } - int firstCp = codePointAtIndexNode.execute(readNl, 0, TS_ENCODING); + int firstCp = codePointAtIndexNode.execute(readNl, 0); int i = len - (nlLen - 1); if (i < start) { i = start; @@ -382,7 +383,7 @@ static TruffleString readline(VirtualFrame frame, PTextIO self, int limit, int chunked = 0; int start, endpos, offsetToBuffer; TruffleString line = null; - TruffleStringBuilder chunks = null; + TruffleStringBuilderUTF32 chunks = null; TruffleString remaining = null; int[] consumed = new int[1]; @@ -446,7 +447,7 @@ static TruffleString readline(VirtualFrame frame, PTextIO self, int limit, if (endpos > start) { /* No line ending seen yet - put aside current data */ if (chunks == null) { - chunks = TruffleStringBuilder.create(TS_ENCODING); + chunks = TruffleStringBuilder.createUTF32(); } TruffleString s = substringNode.execute(line, start, endpos - start, TS_ENCODING, true); appendStringNode.execute(chunks, s); @@ -474,7 +475,7 @@ static TruffleString readline(VirtualFrame frame, PTextIO self, int limit, } if (remaining != null) { if (chunks == null) { - chunks = TruffleStringBuilder.create(TS_ENCODING); + chunks = TruffleStringBuilder.createUTF32(); } appendStringNode.execute(chunks, remaining); } @@ -817,7 +818,7 @@ static void init(VirtualFrame frame, Node inliningTarget, PTextIO self, Object b @Cached PyObjectLookupAttr lookup, @Cached PyObjectIsTrueNode isTrueNode, @Cached TruffleString.CodePointLengthNode codePointLengthNode, - @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode, + @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, @Cached TruffleString.IndexOfCodePointNode indexOfCodePointNode, @Cached TruffleString.EqualNode equalNode, @Cached(inline = false) WarningsModuleBuiltins.WarnNode warnNode, diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/json/JSONEncoderBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/json/JSONEncoderBuiltins.java index b361d3baec..06c872c35f 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/json/JSONEncoderBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/json/JSONEncoderBuiltins.java @@ -7,15 +7,16 @@ import static com.oracle.graal.python.builtins.PythonBuiltinClassType.TypeError; import static com.oracle.graal.python.builtins.PythonBuiltinClassType.ValueError; +import static com.oracle.graal.python.builtins.modules.json.JSONScannerBuiltins.RECURSION_LIMIT; +import static com.oracle.graal.python.builtins.objects.cext.structs.CFields.PyFloatObject__ob_fval; import static com.oracle.graal.python.nodes.PGuards.isDouble; import static com.oracle.graal.python.nodes.PGuards.isInteger; import static com.oracle.graal.python.nodes.PGuards.isPFloat; import static com.oracle.graal.python.nodes.PGuards.isPInt; import static com.oracle.graal.python.nodes.PGuards.isString; -import static com.oracle.graal.python.nodes.truffle.TruffleStringMigrationHelpers.isJavaString; import static com.oracle.graal.python.util.PythonUtils.TS_ENCODING; -import static com.oracle.graal.python.util.PythonUtils.toTruffleStringUncached; import static com.oracle.graal.python.util.PythonUtils.tsLiteral; +import static com.oracle.truffle.api.CompilerDirectives.UNLIKELY_PROBABILITY; import static com.oracle.truffle.api.CompilerDirectives.castExact; import java.util.List; @@ -30,13 +31,14 @@ import com.oracle.graal.python.builtins.PythonBuiltins; import com.oracle.graal.python.builtins.modules.json.JSONEncoderBuiltinsClinicProviders.MakeEncoderClinicProviderGen; import com.oracle.graal.python.builtins.objects.PNone; +import com.oracle.graal.python.builtins.objects.cext.PythonAbstractNativeObject; +import com.oracle.graal.python.builtins.objects.cext.structs.CStructAccess; import com.oracle.graal.python.builtins.objects.common.HashingStorage; import com.oracle.graal.python.builtins.objects.common.HashingStorageNodes.HashingStorageGetIterator; import com.oracle.graal.python.builtins.objects.common.HashingStorageNodes.HashingStorageIterator; import com.oracle.graal.python.builtins.objects.common.HashingStorageNodes.HashingStorageIteratorKey; import com.oracle.graal.python.builtins.objects.common.HashingStorageNodes.HashingStorageIteratorNext; import com.oracle.graal.python.builtins.objects.common.HashingStorageNodes.HashingStorageIteratorValue; -import com.oracle.graal.python.builtins.objects.common.HashingStorageNodes.HashingStorageLen; import com.oracle.graal.python.builtins.objects.common.SequenceStorageNodes; import com.oracle.graal.python.builtins.objects.dict.PDict; import com.oracle.graal.python.builtins.objects.floats.FloatBuiltins; @@ -64,25 +66,32 @@ import com.oracle.graal.python.nodes.builtins.ListNodes.ConstructListNode; import com.oracle.graal.python.nodes.call.special.CallUnaryMethodNode; import com.oracle.graal.python.nodes.call.special.LookupAndCallUnaryNode; +import com.oracle.graal.python.nodes.classes.IsSubtypeNode; import com.oracle.graal.python.nodes.function.PythonBuiltinBaseNode; import com.oracle.graal.python.nodes.function.builtins.PythonClinicBuiltinNode; import com.oracle.graal.python.nodes.function.builtins.PythonTernaryClinicBuiltinNode; import com.oracle.graal.python.nodes.function.builtins.clinic.ArgumentClinicProvider; +import com.oracle.graal.python.nodes.object.GetClassNode; import com.oracle.graal.python.nodes.util.CastToTruffleStringNode; -import com.oracle.graal.python.runtime.ExecutionContext.BoundaryCallContext; -import com.oracle.graal.python.runtime.IndirectCallData.BoundaryCallData; import com.oracle.graal.python.runtime.formatting.FloatFormatter; import com.oracle.graal.python.runtime.object.PFactory; import com.oracle.graal.python.runtime.sequence.PSequence; import com.oracle.graal.python.runtime.sequence.storage.SequenceStorage; +import com.oracle.graal.python.util.ArrayBuilder; import com.oracle.graal.python.util.PythonUtils; +import com.oracle.truffle.api.CompilerDirectives; import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; +import com.oracle.truffle.api.HostCompilerDirectives.InliningCutoff; import com.oracle.truffle.api.dsl.Bind; import com.oracle.truffle.api.dsl.Cached; +import com.oracle.truffle.api.dsl.GenerateInline; import com.oracle.truffle.api.dsl.GenerateNodeFactory; import com.oracle.truffle.api.dsl.NodeFactory; import com.oracle.truffle.api.dsl.Specialization; import com.oracle.truffle.api.frame.VirtualFrame; +import com.oracle.truffle.api.nodes.Node; +import com.oracle.truffle.api.profiles.InlinedBranchProfile; +import com.oracle.truffle.api.profiles.InlinedConditionProfile; import com.oracle.truffle.api.strings.TruffleString; import com.oracle.truffle.api.strings.TruffleStringBuilder; import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; @@ -96,8 +105,13 @@ public final class JSONEncoderBuiltins extends PythonBuiltins { private static final TruffleString T_POSITIVE_INFINITY = tsLiteral("Infinity"); private static final TruffleString T_NEGATIVE_INFINITY = tsLiteral("-Infinity"); private static final TruffleString T_NAN = tsLiteral("NaN"); - private static final TruffleString T_BRACES = tsLiteral("{}"); - private static final TruffleString T_BRACKETS = tsLiteral("[]"); + + private static final byte STATE_INITIAL = 0; + private static final byte STATE_BUILTIN_LIST = 1; + private static final byte STATE_BUILTIN_DICT = 2; + private static final byte STATE_GENERIC_LIST = 3; + private static final byte STATE_GENERIC_DICT = 4; + private static final byte STATE_DEFAULT_FN = 5; public static final TpSlots SLOTS = JSONEncoderBuiltinsSlotsGen.SLOTS; @@ -166,253 +180,396 @@ protected ArgumentClinicProvider getArgumentClinic() { } @Specialization - protected PTuple call(VirtualFrame frame, PJSONEncoder self, Object obj, @SuppressWarnings("unused") int indent, + PTuple call(VirtualFrame frame, PJSONEncoder self, Object obj, @SuppressWarnings("unused") int indent, + @Bind Node inliningTarget, @Bind PythonLanguage language, - @Cached("createFor($node)") BoundaryCallData boundaryCallData) { - return PFactory.createTuple(language, new Object[]{jsonEncode(frame, boundaryCallData, self, obj)}); - } - - private TruffleString jsonEncode(VirtualFrame frame, BoundaryCallData boundaryCallData, PJSONEncoder encoder, Object obj) { - Object saved = BoundaryCallContext.enter(frame, boundaryCallData); - try { - return jsonEncodeBoundary(encoder, obj); - } finally { - BoundaryCallContext.exit(frame, boundaryCallData, saved); - } - } - - @TruffleBoundary - private TruffleString jsonEncodeBoundary(PJSONEncoder encoder, Object obj) { + @Cached InlinedBranchProfile genericListProfile, + @Cached InlinedBranchProfile genericDictProfile, + @Cached InlinedBranchProfile errorProfile, + @Cached GetClassNode getClassNode, + @Cached IsSubtypeNode isSubtypeNode, + @Cached PyTupleCheckExactNode pyTupleCheckExactNode, + @Cached PyListCheckExactNode pyListCheckExactNode, + @Cached ConstructListNode constructListNode, + @Cached PyIterNextNode pyIterNextNode, + @Cached CallUnaryMethodNode callDefaultFn, + @Cached SequenceStorageNodes.GetItemScalarNode getItemScalarNode, + @Cached SequenceStorageNodes.GetItemScalarNode getItemScalarCustomNode, + @Cached HashingStorageGetIterator hashingStorageGetIterator, + @Cached HashingStorageIteratorNext hashingStorageIteratorNext, + @Cached HashingStorageIteratorKey hashingStorageIteratorKey, + @Cached HashingStorageIteratorValue hashingStorageIteratorValue, + @Cached AppendSimpleObjectNode appendSimpleObjectNode, + @Cached TruffleStringBuilder.AppendCodePointNode appendCodePointNode, + @Cached TruffleStringBuilder.AppendStringNode appendStringNode, + @Cached TruffleStringBuilder.ToStringNode toStringNode) { TruffleStringBuilderUTF32 builder = PythonUtils.createStringBuilder(); - appendListObj(encoder, builder, obj); - return TruffleStringBuilder.ToStringNode.getUncached().execute(builder); - } - - private void appendConst(TruffleStringBuilderUTF32 builder, Object obj) { - if (obj == PNone.NONE) { - builder.appendStringUncached(T_NULL); - } else if (obj == Boolean.TRUE) { - builder.appendStringUncached(T_TRUE); - } else { - assert obj == Boolean.FALSE; - builder.appendStringUncached(T_FALSE); - } - } - - private void appendFloat(PJSONEncoder encoder, TruffleStringBuilderUTF32 builder, double obj) { - if (!Double.isFinite(obj)) { - if (!encoder.allowNan) { - throw PRaiseNode.raiseStatic(this, ValueError, ErrorMessages.OUT_OF_RANGE_FLOAT_NOT_JSON_COMPLIANT, PyObjectReprAsTruffleStringNode.executeUncached(obj)); + ArrayBuilder stack = new ArrayBuilder<>(8); + Object key = null; + Object value = obj; + byte state = STATE_INITIAL; + boolean first = true; + Object parent = null; + SequenceStorage builtinListStorage = null; + HashingStorage builtinDictStorage = null; + HashingStorageIterator builtinDictIterator = null; + Object genericIterator = null; + boolean checkCircles = self.markers != PNone.NONE; + PJSONEncoder.FastEncode fastEncode = self.fastEncode; + outer: while (true) { + boolean skip = false; + if (state != STATE_INITIAL && state != STATE_DEFAULT_FN && !first) { + appendStringNode.execute(builder, self.itemSeparator); } - if (obj > 0) { - builder.appendStringUncached(T_POSITIVE_INFINITY); - } else if (obj < 0) { - builder.appendStringUncached(T_NEGATIVE_INFINITY); - } else { - builder.appendStringUncached(T_NAN); + if (state == STATE_BUILTIN_DICT || state == STATE_GENERIC_DICT) { + boolean isString = isString(key); + if (isString || isSimpleObj(key, inliningTarget, getClassNode, isSubtypeNode)) { + if (!isString) { + appendCodePointNode.execute(builder, '"'); + } + appendSimpleObjectNode.execute(frame, self, fastEncode, builder, key); + if (!isString) { + appendCodePointNode.execute(builder, '"'); + } + } else { + if (self.skipKeys) { + skip = true; + } else { + errorProfile.enter(inliningTarget); + throw PRaiseNode.raiseStatic(this, TypeError, ErrorMessages.KEYS_MUST_BE_STR_INT___NOT_P, key); + } + } + if (!skip) { + appendStringNode.execute(builder, self.keySeparator); + } } - } else { - builder.appendStringUncached(formatDouble(obj)); - } - } - - private TruffleString formatDouble(double obj) { - FloatFormatter f = new FloatFormatter(FloatBuiltins.StrNode.spec, this); - f.setMinFracDigits(1); - return FloatBuiltins.StrNode.doFormat(obj, f); - } - - private void appendString(PJSONEncoder encoder, TruffleStringBuilderUTF32 builder, TruffleString obj) { - switch (encoder.fastEncode) { - case FastEncode: - JSONUtils.appendStringUncached(obj, builder, false); - break; - case FastEncodeAscii: - JSONUtils.appendStringUncached(obj, builder, true); - break; - case None: - Object result = CallUnaryMethodNode.getUncached().executeObject(encoder.encoder, obj); - if (!isString(result)) { - throw PRaiseNode.raiseStatic(this, TypeError, ErrorMessages.ENCODER_MUST_RETURN_STR, result); + if (!skip) { + if (appendSimpleObjectNode.execute(frame, self, fastEncode, builder, value)) { + first = false; + // done + } else { + // startRecursion(self, value); + if (checkCircles) { + for (int i = 0; i < stack.size(); i++) { + if (stack.get(i).obj == value) { + errorProfile.enter(inliningTarget); + throw PRaiseNode.raiseStatic(this, ValueError, ErrorMessages.CIRCULAR_REFERENCE_DETECTED); + } + } + } + parent = value; + final Object stackIterator; + final Object stackStorage; + if (value instanceof PList || value instanceof PTuple) { + PSequence list = (PSequence) value; + appendCodePointNode.execute(builder, '['); + first = true; + if (pyTupleCheckExactNode.execute(inliningTarget, list) || pyListCheckExactNode.execute(inliningTarget, list)) { + state = STATE_BUILTIN_LIST; + builtinListStorage = list.getSequenceStorage(); + stackStorage = builtinListStorage; + stackIterator = null; + } else { + genericListProfile.enter(inliningTarget); + state = STATE_GENERIC_LIST; + genericIterator = callGetListIter.executeCached(frame, list); + stackStorage = null; + stackIterator = genericIterator; + } + } else if (value instanceof PDict dict) { + appendCodePointNode.execute(builder, '{'); + first = true; + if (!self.sortKeys && PGuards.isBuiltinDict(dict)) { + state = STATE_BUILTIN_DICT; + builtinDictStorage = dict.getDictStorage(); + builtinDictIterator = hashingStorageGetIterator.execute(inliningTarget, builtinDictStorage); + stackStorage = builtinDictStorage; + stackIterator = builtinDictIterator; + } else { + genericDictProfile.enter(inliningTarget); + state = STATE_GENERIC_DICT; + PList items = constructListNode.execute(frame, callGetItems.executeObject(frame, dict)); + if (self.sortKeys) { + sortList.execute(frame, items); + } + genericIterator = callGetDictIter.executeCached(frame, items); + stackStorage = null; + stackIterator = genericIterator; + } + } else { + state = STATE_DEFAULT_FN; + stackStorage = null; + stackIterator = null; + } + if (stack.size() > RECURSION_LIMIT) { + errorProfile.enter(inliningTarget); + throw JSONScannerBuiltins.recursionError(this, language); + } + stack.add(new StackEntry(state, parent, stackStorage, stackIterator, 0)); + } + } + while (true) { + switch (state) { + case STATE_INITIAL -> { + break outer; + } + case STATE_BUILTIN_LIST -> { + if (stack.peek().index < builtinListStorage.length()) { + value = getItemScalarNode.execute(inliningTarget, builtinListStorage, stack.peek().index++); + continue outer; + } + appendCodePointNode.execute(builder, ']'); + } + case STATE_BUILTIN_DICT -> { + if (hashingStorageIteratorNext.execute(inliningTarget, builtinDictStorage, builtinDictIterator)) { + key = hashingStorageIteratorKey.execute(inliningTarget, builtinDictStorage, builtinDictIterator); + value = hashingStorageIteratorValue.execute(inliningTarget, builtinDictStorage, builtinDictIterator); + continue outer; + } else { + appendCodePointNode.execute(builder, '}'); + } + } + case STATE_GENERIC_LIST, STATE_GENERIC_DICT -> { + try { + genericListProfile.enter(inliningTarget); + Object item = pyIterNextNode.execute(frame, inliningTarget, genericIterator); + if (state == STATE_GENERIC_DICT) { + genericDictProfile.enter(inliningTarget); + if (!(item instanceof PTuple itemTuple) || itemTuple.getSequenceStorage().length() != 2) { + errorProfile.enter(inliningTarget); + throw PRaiseNode.raiseStatic(this, ValueError, ErrorMessages.ITEMS_MUST_RETURN_2_TUPLES); + } + SequenceStorage sequenceStorage = itemTuple.getSequenceStorage(); + key = getItemScalarCustomNode.execute(inliningTarget, sequenceStorage, 0); + value = getItemScalarCustomNode.execute(inliningTarget, sequenceStorage, 1); + } else { + value = item; + } + } catch (IteratorExhausted e) { + appendCodePointNode.execute(builder, state == STATE_GENERIC_LIST ? ']' : '}'); + break; + } + continue outer; + } + case STATE_DEFAULT_FN -> { + if (stack.peek().index == 0) { + value = callDefaultFn.executeObject(self.defaultFn, parent); + stack.peek().index = 1; + continue outer; + } + } } - builder.appendStringUncached(CastToTruffleStringNode.executeUncached(result)); - break; - default: - assert false; - break; + first = false; + // current list or dict is exhausted, pop parent from stack + // endRecursion(self, parent); + stack.pop(); + if (stack.isEmpty()) { + break outer; + } + StackEntry top = stack.peek(); + state = top.state; + parent = top.obj; + switch (state) { + case STATE_BUILTIN_LIST -> { + builtinListStorage = (SequenceStorage) top.storage; + } + case STATE_BUILTIN_DICT -> { + builtinDictStorage = (HashingStorage) top.storage; + builtinDictIterator = (HashingStorageIterator) top.iterator; + } + case STATE_GENERIC_LIST, STATE_GENERIC_DICT -> { + genericIterator = top.iterator; + } + case STATE_DEFAULT_FN -> { + } + } + } } + return PFactory.createTuple(language, new Object[]{toStringNode.execute(builder)}); } - private static boolean isSimpleObj(Object obj) { - return obj == PNone.NONE || obj == Boolean.TRUE || obj == Boolean.FALSE || isString(obj) || isInteger(obj) || isPInt(obj) || obj instanceof Float || isDouble(obj) || isPFloat(obj); - } - - private boolean appendSimpleObj(PJSONEncoder encoder, TruffleStringBuilderUTF32 builder, Object obj) { - if (obj == PNone.NONE || obj == Boolean.TRUE || obj == Boolean.FALSE) { - appendConst(builder, obj); - } else if (isJavaString(obj)) { - appendString(encoder, builder, toTruffleStringUncached((String) obj)); - } else if (obj instanceof TruffleString) { - appendString(encoder, builder, (TruffleString) obj); - } else if (obj instanceof PString) { - appendString(encoder, builder, StringNodes.StringMaterializeNode.executeUncached((PString) obj)); - } else if (obj instanceof Integer) { - builder.appendIntNumberUncached((int) obj); - } else if (obj instanceof Long) { - builder.appendLongNumberUncached((long) obj); - } else if (obj instanceof PInt) { - builder.appendStringUncached(TruffleString.FromJavaStringNode.getUncached().execute(castExact(obj, PInt.class).toString(), TS_ENCODING)); - } else if (obj instanceof Float) { - appendFloat(encoder, builder, (float) obj); - } else if (obj instanceof Double) { - appendFloat(encoder, builder, (double) obj); - } else if (obj instanceof PFloat) { - appendFloat(encoder, builder, ((PFloat) obj).asDouble()); - } else { - return false; + private static final class StackEntry { + private final byte state; + private final Object obj; + private final Object storage; + private final Object iterator; + private int index; + + private StackEntry(byte state, Object obj, Object storage, Object iterator, int index) { + this.state = state; + this.obj = obj; + this.storage = storage; + this.iterator = iterator; + this.index = index; } - return true; } - private void appendListObj(PJSONEncoder encoder, TruffleStringBuilderUTF32 builder, Object obj) { - if (appendSimpleObj(encoder, builder, obj)) { - // done - } else if (obj instanceof PList || obj instanceof PTuple) { - appendList(encoder, builder, (PSequence) obj); - } else if (obj instanceof PDict) { - appendDict(encoder, builder, (PDict) obj); + private static boolean isSimpleObj(Object obj, + Node inliningTarget, + GetClassNode getClassNode, + IsSubtypeNode isSubtypeNode) { + if (obj == PNone.NONE || obj == Boolean.TRUE || obj == Boolean.FALSE || isString(obj) || isInteger(obj) || isPInt(obj) || obj instanceof Float || isDouble(obj) || isPFloat(obj)) { + return true; + } else if (CompilerDirectives.injectBranchProbability(UNLIKELY_PROBABILITY, obj instanceof PythonAbstractNativeObject)) { + return isNativeStringOrFloat(inliningTarget, getClassNode, isSubtypeNode, (PythonAbstractNativeObject) obj); } else { - startRecursion(encoder, obj); - Object newObj = CallUnaryMethodNode.getUncached().executeObject(encoder.defaultFn, obj); - appendListObj(encoder, builder, newObj); - endRecursion(encoder, obj); + return false; } } - private static void endRecursion(PJSONEncoder encoder, Object obj) { - if (encoder.markers != PNone.NONE) { - encoder.removeCircular(obj); - } + @InliningCutoff + private static boolean isNativeStringOrFloat(Node inliningTarget, GetClassNode getClassNode, IsSubtypeNode isSubtypeNode, PythonAbstractNativeObject nativeObj) { + Object pyClass = getClassNode.execute(inliningTarget, nativeObj); + return isSubtypeNode.execute(pyClass, PythonBuiltinClassType.PString) || isSubtypeNode.execute(pyClass, PythonBuiltinClassType.PFloat); } - private void startRecursion(PJSONEncoder encoder, Object obj) { - if (encoder.markers != PNone.NONE) { - if (!encoder.tryAddCircular(obj)) { - throw PRaiseNode.raiseStatic(this, ValueError, ErrorMessages.CIRCULAR_REFERENCE_DETECTED); + @GenerateInline(false) + abstract static class AppendSimpleObjectNode extends Node { + + abstract boolean execute(VirtualFrame frame, PJSONEncoder encoder, PJSONEncoder.FastEncode fastEncode, TruffleStringBuilderUTF32 builder, Object obj); + + @Specialization + static boolean appendSimpleObj(VirtualFrame frame, PJSONEncoder encoder, PJSONEncoder.FastEncode fastEncode, TruffleStringBuilderUTF32 builder, Object obj, + @Bind Node inliningTarget, + @Cached InlinedConditionProfile intProfile, + @Cached InlinedConditionProfile longProfile, + @Cached InlinedConditionProfile doubleProfile, + @Cached InlinedConditionProfile bigIntProfile, + @Cached InlinedConditionProfile numberStringProfile, + @Cached InlinedConditionProfile fastEncodeProfile, + @Cached InlinedBranchProfile customStringEncoderProfile, + @Cached InlinedBranchProfile errorProfile, + @Cached CallUnaryMethodNode customToStringCall, + @Cached GetClassNode getClassNode, + @Cached IsSubtypeNode isSubtypeNode, + @Cached CastToTruffleStringNode.ReadNativeStringNode readNativeStringNode, + @Cached CStructAccess.ReadDoubleNode readNativeDoubleNode, + @Cached CastToTruffleStringNode castToTruffleStringNode, + @Cached StringNodes.StringMaterializeNode stringMaterializeNode, + @Cached TruffleString.ByteIndexOfCodePointSetNode byteIndexOfCodePointSetNode1, + @Cached TruffleString.ByteIndexOfCodePointSetNode byteIndexOfCodePointSetNode2, + @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtNode, + @Cached TruffleStringBuilder.AppendCodePointNode appendCodePointNode, + @Cached TruffleStringBuilder.AppendIntNumberNode appendIntNumberNode, + @Cached TruffleStringBuilder.AppendLongNumberNode appendLongNumberNode, + @Cached TruffleStringBuilder.AppendStringNode appendStringNode, + @Cached TruffleStringBuilder.AppendSubstringByteIndexNode appendSubstringNode, + @Cached TruffleString.FromJavaStringNode fromJavaStringNode, + @Cached TruffleString.FromByteArrayNode fromByteArrayNode) { + final TruffleString constString = constToString(obj); + if (constString != null) { + appendStringNode.execute(builder, constString); + return true; } - } - } - - private void appendDict(PJSONEncoder encoder, TruffleStringBuilderUTF32 builder, PDict dict) { - HashingStorage storage = dict.getDictStorage(); - - if (HashingStorageLen.executeUncached(storage) == 0) { - builder.appendStringUncached(T_BRACES); - } else { - startRecursion(encoder, dict); - builder.appendCodePointUncached('{'); - - if (!encoder.sortKeys && PGuards.isBuiltinDict(dict)) { - HashingStorageIterator it = HashingStorageGetIterator.executeUncached(storage); - boolean first = true; - while (HashingStorageIteratorNext.executeUncached(storage, it)) { - Object key = HashingStorageIteratorKey.executeUncached(storage, it); - Object value = HashingStorageIteratorValue.executeUncached(storage, it); - first = appendDictEntry(encoder, builder, first, key, value); - } + final TruffleString string; + if (obj instanceof TruffleString tString) { + string = tString; + } else if (obj instanceof PString pString) { + string = stringMaterializeNode.execute(inliningTarget, pString); + } else if (obj instanceof PythonAbstractNativeObject nativeObj && isSubtypeNode.execute(getClassNode.execute(inliningTarget, nativeObj), PythonBuiltinClassType.PString)) { + string = readNativeStringNode.execute(nativeObj.getPtr()); } else { - appendDictSlowPath(encoder, builder, dict); + string = null; } - builder.appendCodePointUncached('}'); - endRecursion(encoder, dict); - } - } - - private void appendDictSlowPath(PJSONEncoder encoder, TruffleStringBuilderUTF32 builder, com.oracle.graal.python.builtins.objects.dict.PDict dict) { - PList items = ConstructListNode.getUncached().execute(null, callGetItems.executeObject(null, dict)); - if (encoder.sortKeys) { - sortList.execute(null, items); - } - Object iter = callGetDictIter.executeCached(null, items); - boolean first = true; - while (true) { - try { - Object item = PyIterNextNode.executeUncached(iter); - if (!(item instanceof PTuple itemTuple) || itemTuple.getSequenceStorage().length() != 2) { - throw PRaiseNode.raiseStatic(this, ValueError, ErrorMessages.ITEMS_MUST_RETURN_2_TUPLES); + if (string != null) { + if (fastEncode == PJSONEncoder.FastEncode.None) { + customStringEncoderProfile.enter(inliningTarget); + Object result = customToStringCall.executeObject(frame, encoder.encoder, string); + if (!isString(result)) { + errorProfile.enter(inliningTarget); + throw PRaiseNode.raiseStatic(inliningTarget, TypeError, ErrorMessages.ENCODER_MUST_RETURN_STR, result); + } + appendStringNode.execute(builder, castToTruffleStringNode.execute(inliningTarget, result)); + } else { + assert fastEncode == PJSONEncoder.FastEncode.FastEncode || fastEncode == PJSONEncoder.FastEncode.FastEncodeAscii; + JSONUtils.appendString(string, builder, fastEncodeProfile.profile(inliningTarget, fastEncode == PJSONEncoder.FastEncode.FastEncodeAscii), + byteIndexOfCodePointSetNode1, + byteIndexOfCodePointSetNode2, + codePointAtNode, + appendCodePointNode, + appendStringNode, + appendSubstringNode, + fromByteArrayNode); } - SequenceStorage sequenceStorage = itemTuple.getSequenceStorage(); - Object key = SequenceStorageNodes.GetItemScalarNode.executeUncached(sequenceStorage, 0); - Object value = SequenceStorageNodes.GetItemScalarNode.executeUncached(sequenceStorage, 1); - first = appendDictEntry(encoder, builder, first, key, value); - } catch (IteratorExhausted e) { - break; + return true; + } + if (intProfile.profile(inliningTarget, obj instanceof Integer)) { + appendIntNumberNode.execute(builder, (int) obj); + return true; + } + if (longProfile.profile(inliningTarget, obj instanceof Long)) { + appendLongNumberNode.execute(builder, (long) obj); + return true; + } + final double doubleValue; + final boolean isDouble; + if (obj instanceof Float) { + doubleValue = (float) obj; + isDouble = true; + } else if (obj instanceof Double) { + doubleValue = (double) obj; + isDouble = true; + } else if (obj instanceof PFloat) { + doubleValue = ((PFloat) obj).asDouble(); + isDouble = true; + } else if (obj instanceof PythonAbstractNativeObject nativeObj && isSubtypeNode.execute(getClassNode.execute(inliningTarget, nativeObj), PythonBuiltinClassType.PFloat)) { + doubleValue = readNativeDoubleNode.readFromObj(nativeObj, PyFloatObject__ob_fval); + isDouble = true; + } else { + doubleValue = 0; + isDouble = false; + } + final TruffleString numberString; + if (doubleProfile.profile(inliningTarget, isDouble)) { + numberString = floatToString(inliningTarget, encoder, doubleValue, errorProfile); + } else if (bigIntProfile.profile(inliningTarget, obj instanceof PInt)) { + numberString = fromJavaStringNode.execute(castExact(obj, PInt.class).toString(), TS_ENCODING); + } else { + numberString = null; } + if (numberStringProfile.profile(inliningTarget, numberString != null)) { + appendStringNode.execute(builder, numberString); + return true; + } + return false; } - } - private boolean appendDictEntry(PJSONEncoder encoder, TruffleStringBuilderUTF32 builder, boolean first, Object key, Object value) { - if (!first) { - builder.appendStringUncached(encoder.itemSeparator); - } - if (isString(key)) { - appendSimpleObj(encoder, builder, key); - } else { - if (!isSimpleObj(key)) { - if (encoder.skipKeys) { - return true; - } - throw PRaiseNode.raiseStatic(this, TypeError, ErrorMessages.KEYS_MUST_BE_STR_INT___NOT_P, key); + private static TruffleString constToString(Object obj) { + if (obj == PNone.NONE) { + return T_NULL; + } else if (obj == Boolean.TRUE) { + return T_TRUE; + } else if (obj == Boolean.FALSE) { + return T_FALSE; + } else { + return null; } - builder.appendCodePointUncached('"'); - appendSimpleObj(encoder, builder, key); - builder.appendCodePointUncached('"'); } - builder.appendStringUncached(encoder.keySeparator); - appendListObj(encoder, builder, value); - return false; - } - private void appendList(PJSONEncoder encoder, TruffleStringBuilderUTF32 builder, PSequence list) { - SequenceStorage storage = list.getSequenceStorage(); - - if (storage.length() == 0) { - builder.appendStringUncached(T_BRACKETS); - } else { - startRecursion(encoder, list); - builder.appendCodePointUncached('['); - - if (PyTupleCheckExactNode.executeUncached(list) || PyListCheckExactNode.executeUncached(list)) { - for (int i = 0; i < storage.length(); i++) { - if (i > 0) { - builder.appendStringUncached(encoder.itemSeparator); - } - appendListObj(encoder, builder, SequenceStorageNodes.GetItemScalarNode.executeUncached(storage, i)); + private static TruffleString floatToString(Node inliningTarget, PJSONEncoder encoder, double obj, InlinedBranchProfile errorProfile) { + if (!Double.isFinite(obj)) { + if (!encoder.allowNan) { + errorProfile.enter(inliningTarget); + throw PRaiseNode.raiseStatic(inliningTarget, ValueError, ErrorMessages.OUT_OF_RANGE_FLOAT_NOT_JSON_COMPLIANT, PyObjectReprAsTruffleStringNode.executeUncached(obj)); + } + if (obj > 0) { + return T_POSITIVE_INFINITY; + } else if (obj < 0) { + return T_NEGATIVE_INFINITY; + } else { + return T_NAN; } } else { - appendListSlowPath(encoder, builder, list); + return formatDouble(inliningTarget, obj); } - - builder.appendCodePointUncached(']'); - endRecursion(encoder, list); } - } - private void appendListSlowPath(PJSONEncoder encoder, TruffleStringBuilderUTF32 builder, PSequence list) { - Object iter = callGetListIter.executeCached(null, list); - boolean first = true; - while (true) { - try { - Object item = PyIterNextNode.executeUncached(iter); - if (!first) { - builder.appendStringUncached(encoder.itemSeparator); - } - first = false; - appendListObj(encoder, builder, item); - } catch (IteratorExhausted e) { - break; - } + @TruffleBoundary + private static TruffleString formatDouble(Node inliningTarget, double obj) { + FloatFormatter f = new FloatFormatter(FloatBuiltins.StrNode.spec, inliningTarget); + f.setMinFracDigits(1); + return FloatBuiltins.StrNode.doFormat(obj, f); } } } diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/json/JSONModuleBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/json/JSONModuleBuiltins.java index 34f265d5bd..0b6af411fc 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/json/JSONModuleBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/json/JSONModuleBuiltins.java @@ -17,7 +17,8 @@ import com.oracle.graal.python.builtins.PythonBuiltinClassType; import com.oracle.graal.python.builtins.PythonBuiltins; import com.oracle.graal.python.builtins.modules.json.JSONScannerBuiltins.IntRef; -import com.oracle.graal.python.builtins.objects.str.StringNodes.CastToJavaStringCheckedNode; +import com.oracle.graal.python.builtins.objects.str.StringNodes; +import com.oracle.graal.python.builtins.objects.str.StringUtils; import com.oracle.graal.python.nodes.ErrorMessages; import com.oracle.graal.python.nodes.PRaiseNode; import com.oracle.graal.python.nodes.SpecialAttributeNames; @@ -25,16 +26,19 @@ import com.oracle.graal.python.nodes.function.builtins.PythonTernaryClinicBuiltinNode; import com.oracle.graal.python.nodes.function.builtins.PythonUnaryClinicBuiltinNode; import com.oracle.graal.python.nodes.function.builtins.clinic.ArgumentClinicProvider; +import com.oracle.graal.python.runtime.IndirectCallData; import com.oracle.graal.python.runtime.object.PFactory; import com.oracle.truffle.api.dsl.Bind; import com.oracle.truffle.api.dsl.Cached; import com.oracle.truffle.api.dsl.GenerateNodeFactory; import com.oracle.truffle.api.dsl.NodeFactory; import com.oracle.truffle.api.dsl.Specialization; +import com.oracle.truffle.api.frame.VirtualFrame; import com.oracle.truffle.api.nodes.Node; +import com.oracle.truffle.api.profiles.InlinedBranchProfile; import com.oracle.truffle.api.strings.TruffleString; import com.oracle.truffle.api.strings.TruffleStringBuilder; -import com.oracle.truffle.api.strings.TruffleStringIterator; +import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; @CoreFunctions(defineModule = "_json") public final class JSONModuleBuiltins extends PythonBuiltins { @@ -52,7 +56,7 @@ public void initialize(Python3Core core) { } - static boolean isWhitespace(char c) { + static boolean isWhitespace(int c) { return c == ' ' || c == '\t' || c == '\n' || c == '\r'; } @@ -78,13 +82,33 @@ protected ArgumentClinicProvider getArgumentClinic() { } @Specialization - Object call(Object string, int end, boolean strict, + Object call(VirtualFrame frame, Object string, int end, boolean strict, + @Cached("createFor($node)") IndirectCallData.BoundaryCallData boundaryCallData, @Bind Node inliningTarget, - @Cached CastToJavaStringCheckedNode castString, - @Bind PythonLanguage language) { + @Cached StringNodes.CastToTruffleStringChecked1Node castString, + @Cached InlinedBranchProfile errorProfile, + @Bind PythonLanguage language, + @Cached TruffleString.ByteIndexOfCodePointSetNode byteIndexOfCodePointSetNode, + @Cached TruffleString.IntIndexOfAnyIntUTF32Node indexOfAnyIntUTF32Node, + @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, + @Cached TruffleString.SubstringByteIndexNode substringByteIndexNode, + @Cached TruffleStringBuilder.AppendCodePointNode appendCodePointNode, + @Cached TruffleStringBuilder.AppendSubstringByteIndexNode appendSubstringByteIndexNode, + @Cached TruffleStringBuilder.ToStringNode builderToStringNode) { IntRef nextIdx = new IntRef(); - TruffleString result = JSONScannerBuiltins.scanStringUnicode(castString.cast(inliningTarget, string, ErrorMessages.FIRST_ARG_MUST_BE_STRING_NOT_P, string), end, strict, nextIdx, - this); + TruffleString cast = castString.cast(inliningTarget, string, ErrorMessages.FIRST_ARG_MUST_BE_STRING_NOT_P, string); + TruffleString result = JSONScannerBuiltins.parseStringUnicode(frame, boundaryCallData, inliningTarget, cast, end, StringUtils.byteIndexToCodepointIndex(cast.byteLength(TS_ENCODING)), + strict, + nextIdx, + this, + errorProfile, + byteIndexOfCodePointSetNode, + indexOfAnyIntUTF32Node, + codePointAtIndexNode, + substringByteIndexNode, + appendCodePointNode, + appendSubstringByteIndexNode, + builderToStringNode); return PFactory.createTuple(language, new Object[]{result, nextIdx.value}); } } @@ -105,18 +129,27 @@ protected ArgumentClinicProvider getArgumentClinic() { @Specialization static TruffleString call(TruffleString string, @Bind Node inliningTarget, - @Cached TruffleString.CreateCodePointIteratorNode createCodePointIteratorNode, - @Cached TruffleStringIterator.NextNode nextNode, + @Cached TruffleString.ByteIndexOfCodePointSetNode byteIndexOfCodePointSetNode1, + @Cached TruffleString.ByteIndexOfCodePointSetNode byteIndexOfCodePointSetNode2, + @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtNode, @Cached TruffleStringBuilder.AppendCodePointNode appendCodePointNode, @Cached TruffleStringBuilder.AppendStringNode appendStringNode, - @Cached TruffleString.SubstringNode substringNode, + @Cached TruffleStringBuilder.AppendSubstringByteIndexNode appendSubstringNode, + @Cached TruffleString.FromByteArrayNode fromByteArrayNode, @Cached TruffleStringBuilder.ToStringNode toStringNode, @Cached PRaiseNode raiseNode) { try { int len = string.byteLength(TS_ENCODING); // 12.5% overallocated, TruffleStringBuilder.ToStringNode will copy anyway - TruffleStringBuilder builder = TruffleStringBuilder.create(TS_ENCODING, len + (len >> 3) + 2); - JSONUtils.appendString(string, createCodePointIteratorNode.execute(string, TS_ENCODING), builder, false, nextNode, appendCodePointNode, appendStringNode, substringNode); + TruffleStringBuilderUTF32 builder = TruffleStringBuilder.createUTF32(len + (len >> 3) + 2); + JSONUtils.appendString(string, builder, false, + byteIndexOfCodePointSetNode1, + byteIndexOfCodePointSetNode2, + codePointAtNode, + appendCodePointNode, + appendStringNode, + appendSubstringNode, + fromByteArrayNode); return toStringNode.execute(builder); } catch (OutOfMemoryError | NegativeArraySizeException e) { throw raiseNode.raise(inliningTarget, PythonBuiltinClassType.OverflowError, ErrorMessages.STR_TOO_LONG_TO_ESCAPE); @@ -141,19 +174,27 @@ protected ArgumentClinicProvider getArgumentClinic() { @Specialization static TruffleString call(TruffleString string, @Bind Node inliningTarget, - @Cached TruffleString.CreateCodePointIteratorNode createCodePointIteratorNode, - @Cached TruffleStringIterator.NextNode nextNode, + @Cached TruffleString.ByteIndexOfCodePointSetNode byteIndexOfCodePointSetNode1, + @Cached TruffleString.ByteIndexOfCodePointSetNode byteIndexOfCodePointSetNode2, + @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtNode, @Cached TruffleStringBuilder.AppendCodePointNode appendCodePointNode, @Cached TruffleStringBuilder.AppendStringNode appendStringNode, - @Cached TruffleString.SubstringNode substringNode, + @Cached TruffleStringBuilder.AppendSubstringByteIndexNode appendSubstringNode, + @Cached TruffleString.FromByteArrayNode fromByteArrayNode, @Cached TruffleStringBuilder.ToStringNode toStringNode, @Cached PRaiseNode raiseNode) { try { int len = string.byteLength(TS_ENCODING); // 12.5% overallocated, TruffleStringBuilder.ToStringNode will copy anyway - TruffleStringBuilder builder = TruffleStringBuilder.create(TS_ENCODING, len + (len >> 3) + 2); - JSONUtils.appendString(string, createCodePointIteratorNode.execute(string, TS_ENCODING), builder, true, - nextNode, appendCodePointNode, appendStringNode, substringNode); + TruffleStringBuilderUTF32 builder = TruffleStringBuilder.createUTF32(len + (len >> 3) + 2); + JSONUtils.appendString(string, builder, true, + byteIndexOfCodePointSetNode1, + byteIndexOfCodePointSetNode2, + codePointAtNode, + appendCodePointNode, + appendStringNode, + appendSubstringNode, + fromByteArrayNode); return toStringNode.execute(builder); } catch (OutOfMemoryError | NegativeArraySizeException e) { throw raiseNode.raise(inliningTarget, PythonBuiltinClassType.OverflowError, ErrorMessages.STR_TOO_LONG_TO_ESCAPE); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/json/JSONScannerBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/json/JSONScannerBuiltins.java index 2e7e739249..b1829a6411 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/json/JSONScannerBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/json/JSONScannerBuiltins.java @@ -5,11 +5,16 @@ */ package com.oracle.graal.python.builtins.modules.json; +import static com.oracle.graal.python.builtins.PythonBuiltinClassType.RecursionError; +import static com.oracle.graal.python.builtins.objects.str.StringUtils.byteIndexToCodepointIndex; +import static com.oracle.graal.python.builtins.objects.str.StringUtils.codepointIndexToByteIndex; import static com.oracle.graal.python.nodes.StringLiterals.T_STRICT; +import static com.oracle.graal.python.util.PythonUtils.EMPTY_OBJECT_ARRAY; import static com.oracle.graal.python.util.PythonUtils.TS_ENCODING; import static com.oracle.graal.python.util.PythonUtils.toTruffleStringUncached; import static com.oracle.graal.python.util.PythonUtils.tsLiteral; +import java.util.HashMap; import java.util.List; import com.oracle.graal.python.PythonLanguage; @@ -26,6 +31,8 @@ import com.oracle.graal.python.builtins.objects.common.HashingStorageNodes.HashingStorageSetItem; import com.oracle.graal.python.builtins.objects.dict.PDict; import com.oracle.graal.python.builtins.objects.floats.FloatUtils; +import com.oracle.graal.python.builtins.objects.ints.PInt; +import com.oracle.graal.python.builtins.objects.list.PList; import com.oracle.graal.python.builtins.objects.tuple.PTuple; import com.oracle.graal.python.builtins.objects.type.TpSlots; import com.oracle.graal.python.builtins.objects.type.TypeNodes; @@ -44,25 +51,34 @@ import com.oracle.graal.python.nodes.function.builtins.PythonTernaryClinicBuiltinNode; import com.oracle.graal.python.nodes.function.builtins.clinic.ArgumentClinicProvider; import com.oracle.graal.python.nodes.statement.AbstractImportNode; -import com.oracle.graal.python.runtime.ExecutionContext.BoundaryCallContext; +import com.oracle.graal.python.runtime.ExecutionContext; import com.oracle.graal.python.runtime.IndirectCallData.BoundaryCallData; import com.oracle.graal.python.runtime.PythonContext; +import com.oracle.graal.python.runtime.exception.PException; import com.oracle.graal.python.runtime.object.PFactory; import com.oracle.graal.python.runtime.sequence.storage.ObjectSequenceStorage; +import com.oracle.graal.python.util.ArrayBuilder; import com.oracle.truffle.api.CompilerAsserts; import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; +import com.oracle.truffle.api.HostCompilerDirectives.InliningCutoff; +import com.oracle.truffle.api.dsl.Bind; import com.oracle.truffle.api.dsl.Cached; import com.oracle.truffle.api.dsl.GenerateNodeFactory; import com.oracle.truffle.api.dsl.NodeFactory; import com.oracle.truffle.api.dsl.Specialization; import com.oracle.truffle.api.frame.VirtualFrame; import com.oracle.truffle.api.nodes.Node; +import com.oracle.truffle.api.profiles.InlinedBranchProfile; +import com.oracle.truffle.api.profiles.InlinedConditionProfile; import com.oracle.truffle.api.strings.TruffleString; +import com.oracle.truffle.api.strings.TruffleStringBuilder; +import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; @CoreFunctions(extendClasses = PythonBuiltinClassType.JSONScanner) public final class JSONScannerBuiltins extends PythonBuiltins { public static final TruffleString T_JSON_DECODE_ERROR = tsLiteral("JSONDecodeError"); + static final int RECURSION_LIMIT = 50_000; static final class IntRef { int value; @@ -89,15 +105,20 @@ public abstract static class MakeScanner extends PythonBinaryBuiltinNode { @Specialization public PJSONScanner doNew(VirtualFrame frame, Object cls, Object context, + @Bind Node inliningTarget, @Cached PyObjectIsTrueNode castStrict, - @Cached TypeNodes.GetInstanceShape getInstanceShape) { + @Cached TypeNodes.GetInstanceShape getInstanceShape, + @Cached PyFloatCheckExactNode pyFloatCheckExactNode, + @Cached PyLongCheckExactNode pyLongCheckExactNode) { boolean strict = castStrict.execute(frame, getStrict.execute(frame, context)); Object objectHook = getObjectHook.execute(frame, context); Object objectPairsHook = getObjectPairsHook.execute(frame, context); - Object parseFloat = getParseFloat.execute(frame, context); - Object parseInt = getParseInt.execute(frame, context); + Object parseFloatProp = getParseFloat.execute(frame, context); + Object parseIntProp = getParseInt.execute(frame, context); Object parseConstant = getParseConstant.execute(frame, context); + Object parseFloat = pyFloatCheckExactNode.execute(inliningTarget, parseFloatProp) ? PNone.NONE : parseFloatProp; + Object parseInt = pyLongCheckExactNode.execute(inliningTarget, parseIntProp) ? PNone.NONE : parseIntProp; return PFactory.createJSONScanner(cls, getInstanceShape.execute(cls), strict, objectHook, objectPairsHook, parseFloat, parseInt, parseConstant); } } @@ -123,180 +144,106 @@ protected ArgumentClinicProvider getArgumentClinic() { @Specialization protected PTuple call(VirtualFrame frame, PJSONScanner self, TruffleString string, int idx, + @Bind Node inliningTarget, + @Cached("createFor($node)") BoundaryCallData boundaryCallData, + @Cached InlinedConditionProfile defaultProfile, + @Cached InlinedBranchProfile errorProfile, + @Cached InlinedConditionProfile objectHookProfile, + @Cached InlinedConditionProfile objectPairsHookProfile, + @Cached InlinedConditionProfile parseFloatProfile, + @Cached InlinedConditionProfile parseIntProfile, + @Cached InlinedConditionProfile parseConstantProfile, + @Cached HashingStorageSetItem hashingStorageSetItem, + @Cached PyLongFromUnicodeObject pyLongFromUnicodeObject, + @Cached TruffleString.MaterializeNode materializeNode, + @Cached TruffleString.HashCodeNode hashCodeNode, + @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, + @Cached TruffleString.RegionEqualByteIndexNode regionEqualByteIndexNode, + @Cached TruffleString.SubstringByteIndexNode substringByteIndexNode, + @Cached TruffleString.ByteIndexOfCodePointSetNode byteIndexOfCodePointSetNode, + @Cached TruffleString.IntIndexOfAnyIntUTF32Node indexOfAnyIntUTF32Node, @Cached TruffleString.ToJavaStringNode toJavaStringNode, - @Cached("createFor($node)") BoundaryCallData boundaryCallData) { + @Cached TruffleStringBuilder.AppendCodePointNode appendCodePointNode, + @Cached TruffleStringBuilder.AppendSubstringByteIndexNode appendSubstringByteIndexNode, + @Cached TruffleStringBuilder.ToStringNode builderToStringNode) { IntRef nextIdx = new IntRef(); - Object result; - Object saved = BoundaryCallContext.enter(frame, boundaryCallData); - try { - result = scanOnceUnicode(self, toJavaStringNode.execute(string), idx, nextIdx); - } finally { - BoundaryCallContext.exit(frame, boundaryCallData, saved); - } - return PFactory.createTuple(PythonLanguage.get(this), new Object[]{result, nextIdx.value}); - } - - @TruffleBoundary - private Object parseObjectUnicode(PJSONScanner scanner, String string, int start, IntRef nextIdx) { - /* - * Read a JSON object from PyUnicode pystr. idx is the index of the first character - * after the opening curly brace. nextIdx is a return-by-reference index to the first - * character after the closing curly brace. - * - * Returns a new PyObject (usually a dict, but object_hook can change that) - */ - boolean hasPairsHook = scanner.objectPairsHook != PNone.NONE; - - PythonLanguage language = PythonLanguage.get(null); - - int idx = start; - int length = string.length(); - - ObjectSequenceStorage listStorage = null; - EconomicMapStorage mapStorage = null; - if (hasPairsHook) { - listStorage = new ObjectSequenceStorage(4); + boolean strict = self.strict; + Object objectHook = self.objectHook; + Object objectPairsHook = self.objectPairsHook; + Object parseFloat = self.parseFloat; + Object parseInt = self.parseInt; + Object parseConstant = self.parseConstant; + final Object result; + materializeNode.execute(string, TS_ENCODING); + if (defaultProfile.profile(inliningTarget, strict && + objectHook == PNone.NONE && + objectPairsHook == PNone.NONE && + parseFloat == PNone.NONE && + parseInt == PNone.NONE && + parseConstant == PNone.NONE)) { + result = scanOnceUnicode(frame, boundaryCallData, inliningTarget, string, idx, nextIdx, + self.memo, + true, + PNone.NONE, + PNone.NONE, + PNone.NONE, + PNone.NONE, + PNone.NONE, + errorProfile, + hashingStorageSetItem, + pyLongFromUnicodeObject, + hashCodeNode, + codePointAtIndexNode, + regionEqualByteIndexNode, + substringByteIndexNode, + byteIndexOfCodePointSetNode, + indexOfAnyIntUTF32Node, + toJavaStringNode, + appendCodePointNode, + appendSubstringByteIndexNode, + builderToStringNode); } else { - mapStorage = EconomicMapStorage.create(); - } - - /* skip whitespace after { */ - idx = skipWhitespace(string, idx, length); - - /* only loop if the object is non-empty */ - if (idx >= length || string.charAt(idx) != '}') { - while (true) { - - /* read key */ - if (idx >= length || string.charAt(idx) != '"') { - throw decodeError(this, string, idx, ErrorMessages.EXPECTING_PROP_NAME_ECLOSED_IN_DBL_QUOTES); - } - TruffleString newKey = scanStringUnicode(string, idx + 1, scanner.strict, nextIdx, this); - TruffleString key = scanner.memo.putIfAbsent(newKey, newKey); - if (key == null) { - key = newKey; - } - idx = nextIdx.value; - - /* skip whitespace between key and : delimiter, read :, skip whitespace */ - idx = skipWhitespace(string, idx, length); - if (idx >= length || string.charAt(idx) != ':') { - throw decodeError(this, string, idx, ErrorMessages.EXPECTING_COLON_DELIMITER); - } - idx = skipWhitespace(string, idx + 1, length); - - /* read any JSON term */ - Object val = scanOnceUnicode(scanner, string, idx, nextIdx); - idx = nextIdx.value; - - if (hasPairsHook) { - listStorage.insertItem(listStorage.length(), PFactory.createTuple(language, new Object[]{key, val})); - } else { - HashingStorage newStorage = HashingStorageSetItem.executeUncached(mapStorage, key, val); - assert newStorage == mapStorage; - } - - /* skip whitespace before } or , */ - idx = skipWhitespace(string, idx, length); - - /* bail if the object is closed or we didn't get the , delimiter */ - if (idx < length && string.charAt(idx) == '}') { - break; - } - if (idx >= length || string.charAt(idx) != ',') { - throw decodeError(this, string, idx, ErrorMessages.EXPECTING_COMMA_DELIMITER); - } - - /* skip whitespace after , delimiter */ - idx = skipWhitespace(string, idx + 1, length); - } - } - - nextIdx.value = idx + 1; - - if (hasPairsHook) { - return callObjectPairsHook.executeObject(scanner.objectPairsHook, PFactory.createList(language, listStorage)); + result = scanOnceUnicodeCutoff(frame, boundaryCallData, inliningTarget, string, idx, nextIdx, + self.memo, + strict, + objectHookProfile.profile(inliningTarget, objectHook == PNone.NONE) ? PNone.NONE : objectHook, + objectPairsHookProfile.profile(inliningTarget, objectPairsHook == PNone.NONE) ? PNone.NONE : objectPairsHook, + parseFloatProfile.profile(inliningTarget, parseFloat == PNone.NONE) ? PNone.NONE : parseFloat, + parseIntProfile.profile(inliningTarget, parseInt == PNone.NONE) ? PNone.NONE : parseInt, + parseConstantProfile.profile(inliningTarget, parseConstant == PNone.NONE) ? PNone.NONE : parseConstant, + errorProfile, + hashingStorageSetItem, + pyLongFromUnicodeObject, + hashCodeNode, + codePointAtIndexNode, + regionEqualByteIndexNode, + substringByteIndexNode, + byteIndexOfCodePointSetNode, + indexOfAnyIntUTF32Node, + toJavaStringNode, + appendCodePointNode, + appendSubstringByteIndexNode, + builderToStringNode); } - - /* if object_hook is not None: rval = object_hook(rval) */ - PDict rval = PFactory.createDict(language, mapStorage); - if (scanner.objectHook != PNone.NONE) { - return callObjectHook.executeObject(scanner.objectHook, rval); - } - return rval; - } - - @TruffleBoundary - private Object parseArrayUnicode(PJSONScanner scanner, String string, int start, IntRef nextIdx) { - /* - * Read a JSON array from PyUnicode pystr. idx is the index of the first character after - * the opening brace. nextIdx is a return-by-reference index to the first character - * after the closing brace. - * - * Returns a new PyList - */ - int idx = start; - ObjectSequenceStorage storage = new ObjectSequenceStorage(4); - int length = string.length(); - - idx = skipWhitespace(string, idx, length); - - /* only loop if the array is non-empty */ - if (idx >= length || string.charAt(idx) != ']') { - while (true) { - - /* read any JSON term */ - Object val = scanOnceUnicode(scanner, string, idx, nextIdx); - storage.insertItem(storage.length(), val); - idx = nextIdx.value; - - /* skip whitespace between term and , */ - idx = skipWhitespace(string, idx, length); - - /* bail if the array is closed or we didn't get the , delimiter */ - if (idx < length && string.charAt(idx) == ']') { - break; - } - if (idx >= length || string.charAt(idx) != ',') { - throw decodeError(this, string, idx, ErrorMessages.EXPECTING_COMMA_DELIMITER); - } - idx++; - - idx = skipWhitespace(string, idx, length); - } - } - - /* verify that idx < (length-1), string.charAt( idx) should be ']' */ - if (idx >= length || string.charAt(idx) != ']') { - throw decodeError(this, string, length - 1, ErrorMessages.EXPECTING_VALUE); - } - nextIdx.value = idx + 1; - return PFactory.createList(PythonLanguage.get(null), storage); + return PFactory.createTuple(PythonLanguage.get(this), new Object[]{result, nextIdx.value}); } - private static int skipWhitespace(String string, int start, int length) { + private static int skipWhitespace(TruffleString string, int start, int length, TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode) { int idx = start; - while (idx < length && JSONModuleBuiltins.isWhitespace(string.charAt(idx))) { + while (idx < length && JSONModuleBuiltins.isWhitespace(codePointAtIndexNode.execute(string, idx))) { idx++; } return idx; } - private Object parseConstant(PJSONScanner scanner, String constant, int idx, IntRef nextIdx) { - /* - * Read a JSON constant. constant is the constant string that was found ("NaN", - * "Infinity", "-Infinity"). idx is the index of the first character of the constant - * nextIdx is a return-by-reference index to the first character after the constant. - * - * Returns the result of parse_constant - */ - - nextIdx.value = idx + constant.length(); - return callParseConstant.executeObject(scanner.parseConstant, toTruffleStringUncached(constant)); - } - - @TruffleBoundary - private Object matchNumberUnicode(PJSONScanner scanner, String string, int start, IntRef nextIdx) { + private Object matchNumberUnicode(Node inliningTarget, TruffleString string, int start, int length, IntRef nextIdx, + Object parseFloat, + Object parseInt, + InlinedBranchProfile errorProfile, + PyLongFromUnicodeObject pyLongFromUnicodeObject, + TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, + TruffleString.SubstringByteIndexNode substringByteIndexNode, + TruffleString.ToJavaStringNode toJavaStringNode) { /* * Read a JSON number from PyUnicode pystr. idx is the index of the first character of * the number nextIdx is a return-by-reference index to the first character after the @@ -307,57 +254,64 @@ private Object matchNumberUnicode(PJSONScanner scanner, String string, int start */ int idx = start; - int length = string.length(); + boolean negative = codePointAtIndexNode.execute(string, idx) == '-'; /* read a sign if it's there, make sure it's not the end of the string */ - if (string.charAt(idx) == '-') { + if (negative) { idx++; if (idx >= length) { - throw stopIteration(this, start); + throw stopIteration(inliningTarget, errorProfile, this, start); } } /* read as many integer digits as we find as long as it doesn't start with 0 */ - if (string.charAt(idx) >= '1' && string.charAt(idx) <= '9') { + int c = codePointAtIndexNode.execute(string, idx); + long longValue = 0; + if (isDecimalDigitWithoutZero(c)) { + longValue = c - '0'; idx++; - while (idx < length && string.charAt(idx) >= '0' && string.charAt(idx) <= '9') { + while (idx < length && isDecimalDigit(c = codePointAtIndexNode.execute(string, idx))) { + longValue = longValue * 10 + (c - '0'); idx++; } /* if it starts with 0 we only expect one integer digit */ - } else if (string.charAt(idx) == '0') { + } else if (c == '0') { idx++; /* no integer digits, error */ } else { - throw stopIteration(this, start); + throw stopIteration(inliningTarget, errorProfile, this, start); } boolean isFloat = false; /* if the next char is '.' followed by a digit then read all float digits */ - if (idx < (length - 1) && string.charAt(idx) == '.' && string.charAt(idx + 1) >= '0' && string.charAt(idx + 1) <= '9') { + if (idx < (length - 1) && codePointAtIndexNode.execute(string, idx) == '.' && isDecimalDigit(codePointAtIndexNode.execute(string, idx + 1))) { isFloat = true; idx += 2; - while (idx < length && string.charAt(idx) >= '0' && string.charAt(idx) <= '9') { + while (idx < length && isDecimalDigit(codePointAtIndexNode.execute(string, idx))) { idx++; } } /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ - if (idx < (length - 1) && (string.charAt(idx) == 'e' || string.charAt(idx) == 'E')) { + if (idx < (length - 1) && ((codePointAtIndexNode.execute(string, idx) | 0x20) == 'e')) { int e_start = idx; idx++; /* read an exponent sign if present */ - if (idx < (length - 1) && (string.charAt(idx) == '-' || string.charAt(idx) == '+')) { + int plusMinus; + if (idx < (length - 1) && ((plusMinus = codePointAtIndexNode.execute(string, idx)) == '-' || plusMinus == '+')) { idx++; } /* read all digits */ - while (idx < length && string.charAt(idx) >= '0' && string.charAt(idx) <= '9') { + boolean gotDigits = false; + while (idx < length && isDecimalDigit(codePointAtIndexNode.execute(string, idx))) { idx++; + gotDigits = true; } /* if we got a digit, then parse as float. if not, backtrack */ - if (string.charAt(idx - 1) >= '0' && string.charAt(idx - 1) <= '9') { + if (gotDigits) { isFloat = true; } else { idx = e_start; @@ -365,29 +319,103 @@ private Object matchNumberUnicode(PJSONScanner scanner, String string, int start } nextIdx.value = idx; + TruffleString numStr = substringByteIndexNode.execute(string, codepointIndexToByteIndex(start), codepointIndexToByteIndex(idx - start), TS_ENCODING, true); if (isFloat) { - if (PyFloatCheckExactNode.executeUncached(scanner.parseFloat)) { - String numStr = string.substring(start, idx); - return FloatUtils.parseValidString(numStr); + if (parseFloat == PNone.NONE) { + return FloatUtils.parseValidString(toJavaStringNode.execute(numStr)); } else { /* copy the section we determined to be a number */ - TruffleString numStr = toTruffleStringUncached(string.substring(start, idx)); - return callParseFloat.executeObject(scanner.parseFloat, numStr); + return callParseFloat.executeObject(parseFloat, numStr); } } else { - if (PyLongCheckExactNode.executeUncached(scanner.parseInt)) { - TruffleString numStr = TruffleString.fromJavaStringUncached(string, start, idx - start, TS_ENCODING, false); - return PyLongFromUnicodeObject.executeUncached(numStr, 10); + if (parseInt == PNone.NONE) { + // long values with 18 digits or fewer cannot overflow. + if (idx - start <= 18) { + if (negative) { + longValue = -longValue; + } + return PInt.isIntRange(longValue) ? (int) longValue : longValue; + } + return parseLongGeneric(inliningTarget, numStr, pyLongFromUnicodeObject); } else { /* copy the section we determined to be a number */ - TruffleString numStr = toTruffleStringUncached(string.substring(start, idx)); - return callParseInt.executeObject(scanner.parseInt, numStr); + return callParseInt.executeObject(parseInt, numStr); } } } - @TruffleBoundary - private Object scanOnceUnicode(PJSONScanner scanner, String string, int idx, IntRef nextIdx) { + @InliningCutoff + private static Object parseLongGeneric(Node inliningTarget, TruffleString numStr, PyLongFromUnicodeObject pyLongFromUnicodeObject) { + return pyLongFromUnicodeObject.execute(inliningTarget, numStr, 10); + } + + private enum ScannerState { + initial, + list, + dict, + } + + @InliningCutoff + private Object scanOnceUnicodeCutoff(VirtualFrame frame, BoundaryCallData boundaryCallData, Node inliningTarget, TruffleString string, int idx, IntRef nextIdx, + HashMap memo, + boolean strict, + Object objectHook, + Object objectPairsHook, + Object parseFloat, + Object parseInt, + Object parseConstant, + InlinedBranchProfile errorProfile, + HashingStorageSetItem hashingStorageSetItem, + PyLongFromUnicodeObject pyLongFromUnicodeObject, + TruffleString.HashCodeNode hashCodeNode, + TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, + TruffleString.RegionEqualByteIndexNode regionEqualByteIndexNode, + TruffleString.SubstringByteIndexNode substringByteIndexNode, + TruffleString.ByteIndexOfCodePointSetNode byteIndexOfCodePointSetNode, TruffleString.IntIndexOfAnyIntUTF32Node indexOfAnyIntUTF32Node, + TruffleString.ToJavaStringNode toJavaStringNode, + TruffleStringBuilder.AppendCodePointNode appendCodePointNode, + TruffleStringBuilder.AppendSubstringByteIndexNode appendSubstringByteIndexNode, TruffleStringBuilder.ToStringNode builderToStringNode) { + return scanOnceUnicode(frame, boundaryCallData, inliningTarget, string, idx, nextIdx, + memo, + strict, + objectHook, + objectPairsHook, + parseFloat, + parseInt, + parseConstant, + errorProfile, + hashingStorageSetItem, pyLongFromUnicodeObject, hashCodeNode, + codePointAtIndexNode, + regionEqualByteIndexNode, + substringByteIndexNode, + byteIndexOfCodePointSetNode, + indexOfAnyIntUTF32Node, + toJavaStringNode, + appendCodePointNode, + appendSubstringByteIndexNode, + builderToStringNode); + } + + private Object scanOnceUnicode(VirtualFrame frame, BoundaryCallData boundaryCallData, Node inliningTarget, TruffleString string, int idx, IntRef nextIdx, + HashMap memo, + boolean strict, + Object objectHook, + Object objectPairsHook, + Object parseFloat, + Object parseInt, + Object parseConstant, + InlinedBranchProfile errorProfile, + HashingStorageSetItem hashingStorageSetItem, + PyLongFromUnicodeObject pyLongFromUnicodeObject, + TruffleString.HashCodeNode hashCodeNode, + TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, + TruffleString.RegionEqualByteIndexNode regionEqualByteIndexNode, + TruffleString.SubstringByteIndexNode substringByteIndexNode, + TruffleString.ByteIndexOfCodePointSetNode byteIndexOfCodePointSetNode, TruffleString.IntIndexOfAnyIntUTF32Node indexOfAnyIntUTF32Node, + TruffleString.ToJavaStringNode toJavaStringNode, + TruffleStringBuilder.AppendCodePointNode appendCodePointNode, + TruffleStringBuilder.AppendSubstringByteIndexNode appendSubstringByteIndexNode, TruffleStringBuilder.ToStringNode builderToStringNode) { + ArrayBuilder stack = new ArrayBuilder<>(8); /* * Read one JSON term (of any kind) from PyUnicode pystr. idx is the index of the first * character of the term nextIdx is a return-by-reference index to the first character @@ -395,121 +423,444 @@ private Object scanOnceUnicode(PJSONScanner scanner, String string, int idx, Int * * Returns a new PyObject representation of the term. */ + int length = byteIndexToCodepointIndex(string.byteLength(TS_ENCODING)); if (idx < 0) { - throw PRaiseNode.raiseStatic(this, PythonBuiltinClassType.ValueError, ErrorMessages.IDX_CANNOT_BE_NEG); + throw raiseStatic(inliningTarget, errorProfile, this, PythonBuiltinClassType.ValueError, ErrorMessages.IDX_CANNOT_BE_NEG); } - int length = string.length(); if (idx >= length) { - throw stopIteration(this, idx); + throw stopIteration(inliningTarget, errorProfile, this, idx); + } + PythonLanguage language = PythonLanguage.get(null); + boolean hasPairsHook = objectPairsHook != PNone.NONE; + boolean hasObjectHook = objectHook != PNone.NONE; + boolean hasParseConstantHook = parseConstant != PNone.NONE; + ObjectSequenceStorage currentListStorage = null; + ObjectSequenceStorage currentPairsStorage = null; + EconomicMapStorage currentDictStorage = null; + ScannerState state = ScannerState.initial; + boolean commaSeen = false; + while (true) { + idx = skipWhitespace(string, idx, length, codePointAtIndexNode); + final TruffleString propertyKey; + final Object value; + ScannerState nextState = null; + if (state == ScannerState.dict) { + int c; + if (idx >= length || (c = codePointAtIndexNode.execute(string, idx)) != '"' && (c != '}')) { + throw decodeError(frame, boundaryCallData, inliningTarget, errorProfile, this, string, idx, ErrorMessages.EXPECTING_PROP_NAME_ECLOSED_IN_DBL_QUOTES); + } else if (c == '}') { + if (commaSeen) { + throw decodeError(frame, boundaryCallData, inliningTarget, errorProfile, this, string, idx, ErrorMessages.EXPECTING_PROP_NAME_ECLOSED_IN_DBL_QUOTES); + } + nextIdx.value = ++idx; + // pop current dict + Object topOfStack = stack.pop(); + TruffleString parentKey = null; + final Object dict; + if (hasPairsHook) { + if (topOfStack instanceof TruffleString) { + parentKey = (TruffleString) topOfStack; + topOfStack = stack.pop(); + } + assert topOfStack == currentPairsStorage; + dict = callObjectPairsHook.executeObject(objectPairsHook, PFactory.createList(language, currentPairsStorage)); + } else if (hasObjectHook) { + if (topOfStack instanceof TruffleString) { + parentKey = (TruffleString) topOfStack; + topOfStack = stack.pop(); + } + assert topOfStack instanceof PDict; + assert ((PDict) topOfStack).getDictStorage() == currentDictStorage; + dict = callObjectHook.executeObject(objectHook, topOfStack); + } else { + assert topOfStack instanceof PDict; + assert ((PDict) topOfStack).getDictStorage() == currentDictStorage; + dict = topOfStack; + } + if (stack.isEmpty()) { + return dict; + } else { + if (hasPairsHook || hasObjectHook) { + nextState = parentKey == null ? ScannerState.list : ScannerState.dict; + if (nextState == ScannerState.dict) { + Object parent = stack.peek(); + if (hasPairsHook) { + currentPairsStorage = (ObjectSequenceStorage) parent; + currentPairsStorage.setObjectItemNormalized(currentPairsStorage.length() - 1, PFactory.createTuple(language, new Object[]{parentKey, dict})); + } else { + PDict parentDict = (PDict) parent; + currentDictStorage = (EconomicMapStorage) parentDict.getDictStorage(); + HashingStorage setItemReturnVal = hashingStorageSetItem.execute(inliningTarget, currentDictStorage, parentKey, dict); + assert currentDictStorage == setItemReturnVal; + } + } else { + currentListStorage = (ObjectSequenceStorage) ((PList) stack.peek()).getSequenceStorage(); + currentListStorage.setObjectItemNormalized(currentListStorage.length() - 1, dict); + } + } else { + Object parent = stack.peek(); + if (parent instanceof PDict parentDict) { + currentDictStorage = (EconomicMapStorage) parentDict.getDictStorage(); + nextState = ScannerState.dict; + } else { + currentListStorage = (ObjectSequenceStorage) ((PList) parent).getSequenceStorage(); + nextState = ScannerState.list; + } + } + } + propertyKey = null; + } else { + /* read key */ + TruffleString newKey = parseStringUnicode(frame, boundaryCallData, inliningTarget, string, idx + 1, length, strict, nextIdx, this, errorProfile, + byteIndexOfCodePointSetNode, + indexOfAnyIntUTF32Node, + codePointAtIndexNode, + substringByteIndexNode, + appendCodePointNode, + appendSubstringByteIndexNode, builderToStringNode); + hashCodeNode.execute(newKey, TS_ENCODING); + TruffleString key = memoPutIfAbsent(memo, newKey); + if (key == null) { + key = newKey; + } + propertyKey = key; + idx = nextIdx.value; + /* + * skip whitespace between key and : delimiter, read :, skip whitespace + */ + idx = skipWhitespace(string, idx, length, codePointAtIndexNode); + if (idx >= length || codePointAtIndexNode.execute(string, idx) != ':') { + throw decodeError(frame, boundaryCallData, inliningTarget, errorProfile, this, string, idx, ErrorMessages.EXPECTING_COLON_DELIMITER); + } + idx = skipWhitespace(string, idx + 1, length, codePointAtIndexNode); + } + } else if (state == ScannerState.list) { + if (idx >= length) { + throw decodeError(frame, boundaryCallData, inliningTarget, errorProfile, this, string, length, ErrorMessages.EXPECTING_VALUE); + } + if (codePointAtIndexNode.execute(string, idx) == ']') { + if (commaSeen) { + throw decodeError(frame, boundaryCallData, inliningTarget, errorProfile, this, string, idx, ErrorMessages.EXPECTING_VALUE); + } + nextIdx.value = ++idx; + Object topOfStack = stack.pop(); + assert topOfStack instanceof PList; + assert ((PList) topOfStack).getSequenceStorage() == currentListStorage; + if (stack.isEmpty()) { + return topOfStack; + } else { + Object parent = stack.peek(); + if (parent instanceof PList parentList) { + currentListStorage = (ObjectSequenceStorage) parentList.getSequenceStorage(); + nextState = ScannerState.list; + } else if (hasPairsHook) { + currentPairsStorage = (ObjectSequenceStorage) parent; + nextState = ScannerState.dict; + } else { + currentDictStorage = (EconomicMapStorage) ((PDict) parent).getDictStorage(); + nextState = ScannerState.dict; + } + } + } + propertyKey = null; + } else { + propertyKey = null; + } + if (nextState == null) { + if (idx >= length) { + throw decodeError(frame, boundaryCallData, inliningTarget, errorProfile, this, string, length, ErrorMessages.EXPECTING_VALUE); + } + int c = codePointAtIndexNode.execute(string, idx); + if (c == '{') { + idx++; + if (hasPairsHook) { + value = new ObjectSequenceStorage(4); + } else { + value = PFactory.createDict(language, EconomicMapStorage.create()); + } + nextState = ScannerState.dict; + } else if (c == '[') { + idx++; + value = PFactory.createList(language, new ObjectSequenceStorage(4)); + nextState = ScannerState.list; + } else { + value = parsePrimitiveUnicode(frame, boundaryCallData, inliningTarget, hasParseConstantHook, string, idx, length, nextIdx, c, strict, parseConstant, parseFloat, parseInt, + errorProfile, + pyLongFromUnicodeObject, + codePointAtIndexNode, + regionEqualByteIndexNode, + substringByteIndexNode, + byteIndexOfCodePointSetNode, + indexOfAnyIntUTF32Node, + appendCodePointNode, + appendSubstringByteIndexNode, + toJavaStringNode, + builderToStringNode); + idx = nextIdx.value; + } + if (state == ScannerState.dict) { + assert propertyKey != null; + if (hasPairsHook) { + currentPairsStorage.appendItem(PFactory.createTuple(language, new Object[]{propertyKey, value})); + } else { + HashingStorage newStorage = hashingStorageSetItem.execute(inliningTarget, currentDictStorage, propertyKey, value); + assert newStorage == currentDictStorage; + } + } else if (state == ScannerState.list) { + assert propertyKey == null; + currentListStorage.appendItem(value); + } else if (nextState == null) { + assert stack.isEmpty(); + return value; + } + if (nextState != null) { + stack.add(value); + if (stack.size() > RECURSION_LIMIT) { + throw recursionError(inliningTarget, errorProfile, this, language); + } + if (nextState == ScannerState.list) { + currentListStorage = (ObjectSequenceStorage) ((PList) value).getSequenceStorage(); + } else if (hasPairsHook) { + assert nextState == ScannerState.dict; + currentPairsStorage = (ObjectSequenceStorage) value; + if (state == ScannerState.dict) { + stack.add(propertyKey); + } + } else { + assert nextState == ScannerState.dict; + currentDictStorage = (EconomicMapStorage) ((PDict) value).getDictStorage(); + if (hasObjectHook && state == ScannerState.dict) { + stack.add(propertyKey); + } + } + state = nextState; + commaSeen = false; + continue; + } + } else { + state = nextState; + } + idx = skipWhitespace(string, idx, length, codePointAtIndexNode); + int c = idx < length ? codePointAtIndexNode.execute(string, idx) : 0; + if (c == (state == ScannerState.dict ? '}' : ']')) { + commaSeen = false; + continue; + } + if (c != ',') { + throw decodeError(frame, boundaryCallData, inliningTarget, errorProfile, this, string, idx, ErrorMessages.EXPECTING_COMMA_DELIMITER); + } + commaSeen = true; + idx++; } + } - switch (string.charAt(idx)) { + @TruffleBoundary + private static TruffleString memoPutIfAbsent(HashMap memo, TruffleString newKey) { + return memo.putIfAbsent(newKey, newKey); + } + + private static final TruffleString[] DOUBLE_CONSTANTS = { + tsLiteral("NaN"), + tsLiteral("Infinity"), + tsLiteral("-Infinity"), + }; + + private static final TruffleString ULL = tsLiteral("ull"); + private static final TruffleString RUE = tsLiteral("rue"); + private static final TruffleString ALSE = tsLiteral("alse"); + private static final TruffleString AN = tsLiteral("aN"); + private static final TruffleString NFINITY = tsLiteral("nfinity"); + private static final TruffleString INFINITY = tsLiteral("Infinity"); + + private Object parsePrimitiveUnicode(VirtualFrame frame, BoundaryCallData boundaryCallData, Node inliningTarget, boolean hasParseConstantHook, TruffleString string, int idx, + int length, IntRef nextIdx, int c, + boolean strict, + Object parseConstant, + Object parseFloat, + Object parseInt, + InlinedBranchProfile errorProfile, + PyLongFromUnicodeObject pyLongFromUnicodeObject, + TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, + TruffleString.RegionEqualByteIndexNode regionEqualByteIndexNode, + TruffleString.SubstringByteIndexNode substringByteIndexNode, + TruffleString.ByteIndexOfCodePointSetNode byteIndexOfCodePointSetNode, + TruffleString.IntIndexOfAnyIntUTF32Node indexOfAnyIntUTF32Node, + TruffleStringBuilder.AppendCodePointNode appendCodePointNode, + TruffleStringBuilder.AppendSubstringByteIndexNode appendSubstringByteIndexNode, + TruffleString.ToJavaStringNode toJavaStringNode, + TruffleStringBuilder.ToStringNode builderToStringNode) { + int doubleConstant = -1; + switch (c) { case '"': /* string */ - return scanStringUnicode(string, idx + 1, scanner.strict, nextIdx, this); - case '{': - /* object */ - return parseObjectUnicode(scanner, string, idx + 1, nextIdx); - case '[': - /* array */ - return parseArrayUnicode(scanner, string, idx + 1, nextIdx); + return parseStringUnicode(frame, boundaryCallData, inliningTarget, string, idx + 1, length, strict, nextIdx, this, + errorProfile, + byteIndexOfCodePointSetNode, + indexOfAnyIntUTF32Node, + codePointAtIndexNode, + substringByteIndexNode, + appendCodePointNode, + appendSubstringByteIndexNode, + builderToStringNode); case 'n': /* null */ - if ((idx + 3 < length) && string.charAt(idx + 1) == 'u' && string.charAt(idx + 2) == 'l' && string.charAt(idx + 3) == 'l') { + if (regionEquals(string, idx, ULL, regionEqualByteIndexNode)) { nextIdx.value = idx + 4; return PNone.NONE; } break; case 't': /* true */ - if ((idx + 3 < length) && string.charAt(idx + 1) == 'r' && string.charAt(idx + 2) == 'u' && string.charAt(idx + 3) == 'e') { + if (regionEquals(string, idx, RUE, regionEqualByteIndexNode)) { nextIdx.value = idx + 4; return true; } break; case 'f': /* false */ - if ((idx + 4 < length) && string.charAt(idx + 1) == 'a' && string.charAt(idx + 2) == 'l' && string.charAt(idx + 3) == 's' && string.charAt(idx + 4) == 'e') { + if (regionEquals(string, idx, ALSE, regionEqualByteIndexNode)) { nextIdx.value = idx + 5; return false; } break; case 'N': /* NaN */ - if ((idx + 2 < length) && string.charAt(idx + 1) == 'a' && string.charAt(idx + 2) == 'N') { - return parseConstant(scanner, "NaN", idx, nextIdx); + if (regionEquals(string, idx, AN, regionEqualByteIndexNode)) { + nextIdx.value = idx + 3; + if (hasParseConstantHook) { + doubleConstant = 0; + } else { + return Double.NaN; + } } break; case 'I': /* Infinity */ - if ((idx + 7 < length) && string.charAt(idx + 1) == 'n' && - string.charAt(idx + 2) == 'f' && - string.charAt(idx + 3) == 'i' && - string.charAt(idx + 4) == 'n' && - string.charAt(idx + 5) == 'i' && - string.charAt(idx + 6) == 't' && - string.charAt(idx + 7) == 'y') { - return parseConstant(scanner, "Infinity", idx, nextIdx); + if (regionEquals(string, idx, NFINITY, regionEqualByteIndexNode)) { + nextIdx.value = idx + 8; + if (hasParseConstantHook) { + doubleConstant = 1; + } else { + return Double.POSITIVE_INFINITY; + } } break; case '-': /* -Infinity */ - if ((idx + 8 < length) && string.charAt(idx + 1) == 'I' && - string.charAt(idx + 2) == 'n' && - string.charAt(idx + 3) == 'f' && - string.charAt(idx + 4) == 'i' && - string.charAt(idx + 5) == 'n' && - string.charAt(idx + 6) == 'i' && - string.charAt(idx + 7) == 't' && - string.charAt(idx + 8) == 'y') { - return parseConstant(scanner, "-Infinity", idx, nextIdx); + if (regionEquals(string, idx, INFINITY, regionEqualByteIndexNode)) { + nextIdx.value = idx + 9; + if (hasParseConstantHook) { + doubleConstant = 2; + } else { + return Double.NEGATIVE_INFINITY; + } } break; } + if (doubleConstant >= 0) { + /* + * Read a JSON constant. constant is the constant string that was found ("NaN", + * "Infinity", "-Infinity"). + * + * Returns the result of parse_constant + */ + return callParseConstant.executeObject(parseConstant, DOUBLE_CONSTANTS[doubleConstant]); + } /* Didn't find a string, object, array, or named constant. Look for a number. */ - return matchNumberUnicode(scanner, string, idx, nextIdx); + return matchNumberUnicode(inliningTarget, string, idx, length, nextIdx, parseFloat, parseInt, errorProfile, + pyLongFromUnicodeObject, + codePointAtIndexNode, + substringByteIndexNode, + toJavaStringNode); } + private static boolean regionEquals(TruffleString a, int idx, TruffleString b, TruffleString.RegionEqualByteIndexNode regionEqualByteIndexNode) { + int fromByteIndexB = codepointIndexToByteIndex(idx + 1); + int lengthB = b.byteLength(TS_ENCODING); + return fromByteIndexB + lengthB <= a.byteLength(TS_ENCODING) && regionEqualByteIndexNode.execute(a, fromByteIndexB, b, 0, lengthB, TS_ENCODING); + } } - @TruffleBoundary - static TruffleString scanStringUnicode(String string, int start, boolean strict, IntRef nextIdx, Node raisingNode) { - String result; - StringBuilder builder = null; - - if (start < 0 || start > string.length()) { - throw PRaiseNode.raiseStatic(raisingNode, PythonBuiltinClassType.ValueError, ErrorMessages.END_IS_OUT_OF_BOUNDS); + private static final TruffleString.CodePointSet CODE_POINT_SET_STRICT = TruffleString.CodePointSet.fromRanges(new int[]{ + 0, 0x1f, + '"', '"', + '\\', '\\', + }, TS_ENCODING); + private static final int[] CODE_POINT_SET_NON_STRICT = new int[]{'"', '\\'}; + + static TruffleString parseStringUnicode(VirtualFrame frame, BoundaryCallData boundaryCallData, Node inliningTarget, TruffleString string, int start, int length, boolean strict, + IntRef nextIdx, Node raisingNode, + InlinedBranchProfile errorProfile, + TruffleString.ByteIndexOfCodePointSetNode byteIndexOfCodePointSetNode, + TruffleString.IntIndexOfAnyIntUTF32Node indexOfAnyIntUTF32Node, + TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, + TruffleString.SubstringByteIndexNode substringByteIndexNode, + TruffleStringBuilder.AppendCodePointNode appendCodePointNode, + TruffleStringBuilder.AppendSubstringByteIndexNode appendSubstringByteIndexNode, + TruffleStringBuilder.ToStringNode builderToStringNode) { + if (start < 0 || start > length) { + throw raiseStatic(inliningTarget, errorProfile, raisingNode, PythonBuiltinClassType.ValueError, ErrorMessages.END_IS_OUT_OF_BOUNDS); } int idx = start; - while (idx < string.length()) { - char c = string.charAt(idx++); + if (start < length) { + if (strict) { + idx = byteIndexToCodepointIndex(byteIndexOfCodePointSetNode.execute(string, codepointIndexToByteIndex(start), codepointIndexToByteIndex(length), CODE_POINT_SET_STRICT)); + } else { + idx = indexOfAnyIntUTF32Node.execute(string, start, length, CODE_POINT_SET_NON_STRICT); + } + if (idx < 0) { + idx = length; + } else if (codePointAtIndexNode.execute(string, idx) == '"') { + nextIdx.value = idx + 1; + return substringByteIndexNode.execute(string, codepointIndexToByteIndex(start), codepointIndexToByteIndex(idx - start), TS_ENCODING, false); + } + } + return parseStringUnicodeSlowpath(frame, boundaryCallData, inliningTarget, string, start, length, strict, idx, nextIdx, raisingNode, errorProfile, + codePointAtIndexNode, + appendCodePointNode, + appendSubstringByteIndexNode, + builderToStringNode); + } + + @InliningCutoff + private static TruffleString parseStringUnicodeSlowpath(VirtualFrame frame, BoundaryCallData boundaryCallData, Node inliningTarget, TruffleString string, int start, int length, + boolean strict, int idx, IntRef nextIdx, Node raisingNode, + InlinedBranchProfile errorProfile, + TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, + TruffleStringBuilder.AppendCodePointNode appendCodePointNode, + TruffleStringBuilder.AppendSubstringByteIndexNode appendSubstringByteIndexNode, + TruffleStringBuilder.ToStringNode builderToStringNode) { + TruffleStringBuilderUTF32 builder = TruffleStringBuilder.createUTF32(); + appendSubstringByteIndexNode.execute(builder, string, codepointIndexToByteIndex(start), codepointIndexToByteIndex(idx - start)); + char highSurrogate = 0; + while (idx < length) { + int c = codePointAtIndexNode.execute(string, idx++); if (c == '"') { // we reached the end of the string literal - result = builder == null ? string.substring(start, idx - 1) : builder.toString(); nextIdx.value = idx; - return toTruffleStringUncached(result); + return builderToStringNode.execute(builder); } else if (c == '\\') { // escape sequence, switch to StringBuilder - if (builder == null) { - builder = new StringBuilder().append(string, start, idx - 1); - } - if (idx >= string.length()) { - throw decodeError(raisingNode, string, start - 1, ErrorMessages.UTERMINATED_STR_STARTING); + if (idx >= length) { + throw decodeError(frame, boundaryCallData, inliningTarget, errorProfile, raisingNode, string, start - 1, ErrorMessages.UTERMINATED_STR_STARTING); } - c = string.charAt(idx++); + c = codePointAtIndexNode.execute(string, idx++); if (c == 'u') { - if (idx + 3 >= string.length()) { - throw decodeError(raisingNode, string, idx - 1, ErrorMessages.INVALID_UXXXX_ESCAPE); + if (idx + 3 >= length) { + throw decodeError(frame, boundaryCallData, inliningTarget, errorProfile, raisingNode, string, idx - 1, ErrorMessages.INVALID_UXXXX_ESCAPE); } c = 0; for (int i = 0; i < 4; i++) { - char d = string.charAt(idx++); - int digit = switch (d) { - case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' -> d - '0'; - case 'a', 'b', 'c', 'd', 'e', 'f' -> d - 'a' + 10; - case 'A', 'B', 'C', 'D', 'E', 'F' -> d - 'A' + 10; - default -> throw decodeError(raisingNode, string, idx - 1, ErrorMessages.INVALID_UXXXX_ESCAPE); - }; + int d = codePointAtIndexNode.execute(string, idx++); + final int digit; + final int dLowerCase; + if (isDecimalDigit(d)) { + digit = d - '0'; + } else if ('a' <= (dLowerCase = (d | 0x20)) && dLowerCase <= 'f') { + digit = dLowerCase - ('a' - 10); + } else { + throw decodeError(frame, boundaryCallData, inliningTarget, errorProfile, raisingNode, string, idx - 1, ErrorMessages.INVALID_UXXXX_ESCAPE); + } c = (char) ((c << 4) + digit); } } else { @@ -534,31 +885,91 @@ static TruffleString scanStringUnicode(String string, int start, boolean strict, c = '\t'; break; default: - throw decodeError(raisingNode, string, idx - 1, ErrorMessages.INVALID_ESCAPE); + throw decodeError(frame, boundaryCallData, inliningTarget, errorProfile, raisingNode, string, idx - 1, ErrorMessages.INVALID_ESCAPE); } } - builder.append(c); + if (isLowSurrogate(c) && highSurrogate != 0) { + c = Character.toCodePoint(highSurrogate, (char) c); + } + if (isHighSurrogate(c)) { + highSurrogate = (char) c; + } else { + appendCodePointNode.execute(builder, c, 1, true); + highSurrogate = 0; + } } else { // any other character: check if in strict mode if (strict && c < 0x20) { - throw decodeError(raisingNode, string, idx - 1, ErrorMessages.INVALID_CTRL_CHARACTER_AT); - } - if (builder != null) { - builder.append(c); + throw decodeError(frame, boundaryCallData, inliningTarget, errorProfile, raisingNode, string, idx - 1, ErrorMessages.INVALID_CTRL_CHARACTER_AT); } + appendCodePointNode.execute(builder, c, 1, true); } } - throw decodeError(raisingNode, string, start - 1, ErrorMessages.UNTERMINATED_STR_STARTING_AT); + throw decodeError(frame, boundaryCallData, inliningTarget, errorProfile, raisingNode, string, start - 1, ErrorMessages.UNTERMINATED_STR_STARTING_AT); + } + + private static boolean isHighSurrogate(int ch) { + return ch >= Character.MIN_HIGH_SURROGATE && ch < (Character.MAX_HIGH_SURROGATE + 1); + } + + private static boolean isLowSurrogate(int ch) { + return ch >= Character.MIN_LOW_SURROGATE && ch < (Character.MAX_LOW_SURROGATE + 1); + } + + private static boolean isDecimalDigit(int c) { + return '0' <= c && c <= '9'; } - private static RuntimeException decodeError(Node raisingNode, String jsonString, int pos, TruffleString format) { + private static boolean isDecimalDigitWithoutZero(int c) { + return '1' <= c && c <= '9'; + } + + @InliningCutoff + private static PException raiseStatic(Node inliningTarget, InlinedBranchProfile errorProfile, Node raisingNode, PythonBuiltinClassType type, TruffleString message) { + errorProfile.enter(inliningTarget); + throw PRaiseNode.raiseStatic(raisingNode, type, message); + } + + @InliningCutoff + static RuntimeException recursionError(Node inliningTarget, InlinedBranchProfile errorProfile, Node raisingNode, PythonLanguage language) { + errorProfile.enter(inliningTarget); + throw recursionError(raisingNode, language); + } + + @TruffleBoundary + static RuntimeException recursionError(Node raisingNode, PythonLanguage language) { + CompilerAsserts.neverPartOfCompilation(); + throw PRaiseNode.raiseExceptionObjectStatic(raisingNode, PFactory.createBaseException(language, RecursionError, ErrorMessages.MAXIMUM_RECURSION_DEPTH_EXCEEDED, EMPTY_OBJECT_ARRAY), false); + } + + @InliningCutoff + private static RuntimeException decodeError(VirtualFrame frame, BoundaryCallData boundaryCallData, Node inliningTarget, InlinedBranchProfile errorProfile, Node raisingNode, + TruffleString jsonString, int pos, TruffleString format) { + errorProfile.enter(inliningTarget); + Object saved = ExecutionContext.BoundaryCallContext.enter(frame, boundaryCallData); + try { + throw decodeError(raisingNode, jsonString, pos, format); + } finally { + ExecutionContext.BoundaryCallContext.exit(frame, boundaryCallData, saved); + } + } + + @TruffleBoundary + private static RuntimeException decodeError(Node raisingNode, TruffleString jsonString, int pos, TruffleString format) { CompilerAsserts.neverPartOfCompilation(); Object module = AbstractImportNode.importModule(toTruffleStringUncached("json.decoder")); Object errorClass = PyObjectLookupAttr.executeUncached(module, T_JSON_DECODE_ERROR); - Object exception = CallNode.executeUncached(errorClass, format, toTruffleStringUncached(jsonString), pos); + Object exception = CallNode.executeUncached(errorClass, format, jsonString, pos); throw PRaiseNode.raiseExceptionObjectStatic(raisingNode, exception, false); } + @InliningCutoff + private static RuntimeException stopIteration(Node inliningTarget, InlinedBranchProfile errorProfile, Node raisingNode, Object value) { + errorProfile.enter(inliningTarget); + throw stopIteration(raisingNode, value); + } + + @TruffleBoundary private static RuntimeException stopIteration(Node raisingNode, Object value) { CompilerAsserts.neverPartOfCompilation(); Object exception = CallNode.executeUncached(PythonContext.get(raisingNode).lookupType(PythonBuiltinClassType.StopIteration), value); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/json/JSONUtils.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/json/JSONUtils.java index 2c83773e99..0893e33b9a 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/json/JSONUtils.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/json/JSONUtils.java @@ -44,12 +44,11 @@ import static com.oracle.graal.python.util.PythonUtils.TS_ENCODING; import static com.oracle.graal.python.util.PythonUtils.tsLiteral; +import com.oracle.graal.python.builtins.objects.str.StringUtils; +import com.oracle.truffle.api.HostCompilerDirectives.InliningRoot; import com.oracle.truffle.api.strings.TruffleString; -import com.oracle.truffle.api.strings.TruffleString.SubstringNode; import com.oracle.truffle.api.strings.TruffleStringBuilder; -import com.oracle.truffle.api.strings.TruffleStringBuilder.AppendStringNode; import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; -import com.oracle.truffle.api.strings.TruffleStringIterator; public abstract class JSONUtils { private JSONUtils() { @@ -63,134 +62,103 @@ private JSONUtils() { private static final TruffleString T_ESC_R = tsLiteral("\\r"); private static final TruffleString T_ESC_T = tsLiteral("\\t"); - static void appendStringUncached(TruffleString ts, TruffleStringBuilderUTF32 builder, boolean asciiOnly) { - // Note: appending in chunks does not pay off in the uncached case - builder.appendCodePointUncached('"'); - TruffleStringIterator it = ts.createCodePointIteratorUncached(TS_ENCODING); - while (it.hasNext()) { - int c = it.nextUncached(TS_ENCODING); - switch (c) { - case '\\': - builder.appendStringUncached(T_ESC_BACKSLASH); - break; - case '"': - builder.appendStringUncached(T_ESC_QUOTE); - break; - case '\b': - builder.appendStringUncached(T_ESC_B); - break; - case '\f': - builder.appendStringUncached(T_ESC_F); - break; - case '\n': - builder.appendStringUncached(T_ESC_N); - break; - case '\r': - builder.appendStringUncached(T_ESC_R); - break; - case '\t': - builder.appendStringUncached(T_ESC_T); - break; - default: - if (c <= 0x1f || (asciiOnly && c > '~')) { - // appendSubstringUncached(builder, ts, chunkStart, currentIndex); - if (c <= 0xffff) { - appendEscapedUtf16Uncached((char) c, builder); - } else { - // split SMP codepoint to surrogate pair - appendEscapedUtf16Uncached((char) (0xD800 + ((c - 0x10000) >> 10)), builder); - appendEscapedUtf16Uncached((char) (0xDC00 + ((c - 0x10000) & 0x3FF)), builder); - } - } else { - builder.appendCodePointUncached(c, 1, true); - } - break; + private static final TruffleString.CodePointSet JSON_ESCAPE_CHARS = TruffleString.CodePointSet.fromRanges(new int[]{ + 0, 0x1f, // includes \b, \f, \n, \r, \t + '"', '"', + '\\', '\\', + }, TS_ENCODING); + private static final TruffleString.CodePointSet JSON_ESCAPE_CHARS_ASCII_ONLY = TruffleString.CodePointSet.fromRanges(new int[]{ + 0, 0x1f, // includes \b, \f, \n, \r, \t + '"', '"', + '\\', '\\', + 0x7f, 0x10ffff, + }, TS_ENCODING); + + static void appendString(TruffleString ts, TruffleStringBuilderUTF32 builder, boolean asciiOnly, + TruffleString.ByteIndexOfCodePointSetNode byteIndexOfCodePointSetNode1, + TruffleString.ByteIndexOfCodePointSetNode byteIndexOfCodePointSetNode2, + TruffleString.CodePointAtIndexUTF32Node codePointAtNode, + TruffleStringBuilder.AppendCodePointNode appendCodePointNode, + TruffleStringBuilder.AppendStringNode appendStringNode, + TruffleStringBuilder.AppendSubstringByteIndexNode appendSubstringNode, + TruffleString.FromByteArrayNode fromByteArrayNode) { + appendCodePointNode.execute(builder, '"'); + int byteLength = ts.byteLength(TS_ENCODING); + int codepointLength = StringUtils.byteIndexToCodepointIndex(byteLength); + if (codepointLength < 16) { + int i = 0; + for (; i < codepointLength; i++) { + int c = codePointAtNode.execute(ts, i); + if (c <= 0x1f || c == '"' || c == '\\' || (asciiOnly && c >= 0x7f)) { + break; + } + } + if (i > 0) { + appendSubstringNode.execute(builder, ts, 0, StringUtils.codepointIndexToByteIndex(i)); } + for (; i < codepointLength; i++) { + int c = codePointAtNode.execute(ts, i); + if (c <= 0x1f || c == '"' || c == '\\' || (asciiOnly && c >= 0x7f)) { + appendStringNode.execute(builder, getEscaped(c, fromByteArrayNode)); + } else { + appendCodePointNode.execute(builder, c); + } + } + } else { + appendLongString(ts, builder, asciiOnly, byteIndexOfCodePointSetNode1, byteIndexOfCodePointSetNode2, codePointAtNode, appendStringNode, appendSubstringNode, fromByteArrayNode, byteLength); } - builder.appendCodePointUncached('"'); - } - - private static void appendEscapedUtf16Uncached(char c, TruffleStringBuilderUTF32 builder) { - builder.appendStringUncached(TruffleString.fromByteArrayUncached( - new byte[]{'\\', 'u', HEXDIGITS[(c >> 12) & 0xf], HEXDIGITS[(c >> 8) & 0xf], HEXDIGITS[(c >> 4) & 0xf], HEXDIGITS[c & 0xf]}, TruffleString.Encoding.US_ASCII)); + appendCodePointNode.execute(builder, '"'); } - static void appendString(TruffleString s, TruffleStringIterator it, TruffleStringBuilder builder, boolean asciiOnly, TruffleStringIterator.NextNode nextNode, - TruffleStringBuilder.AppendCodePointNode appendCodePointNode, TruffleStringBuilder.AppendStringNode appendStringNode, SubstringNode substringNode) { - appendCodePointNode.execute(builder, '"', 1, true); - - int chunkStart = 0; - int currentIndex = 0; - while (it.hasNext()) { - int c = nextNode.execute(it, TS_ENCODING); - switch (c) { - case '\\': - appendSubstring(builder, s, chunkStart, currentIndex, appendStringNode, substringNode); - chunkStart = currentIndex + 1; - appendStringNode.execute(builder, T_ESC_BACKSLASH); - break; - case '"': - appendSubstring(builder, s, chunkStart, currentIndex, appendStringNode, substringNode); - chunkStart = currentIndex + 1; - appendStringNode.execute(builder, T_ESC_QUOTE); - break; - case '\b': - appendSubstring(builder, s, chunkStart, currentIndex, appendStringNode, substringNode); - chunkStart = currentIndex + 1; - appendStringNode.execute(builder, T_ESC_B); - break; - case '\f': - appendSubstring(builder, s, chunkStart, currentIndex, appendStringNode, substringNode); - chunkStart = currentIndex + 1; - appendStringNode.execute(builder, T_ESC_F); - break; - case '\n': - appendSubstring(builder, s, chunkStart, currentIndex, appendStringNode, substringNode); - chunkStart = currentIndex + 1; - appendStringNode.execute(builder, T_ESC_N); - break; - case '\r': - appendSubstring(builder, s, chunkStart, currentIndex, appendStringNode, substringNode); - chunkStart = currentIndex + 1; - appendStringNode.execute(builder, T_ESC_R); - break; - case '\t': - appendSubstring(builder, s, chunkStart, currentIndex, appendStringNode, substringNode); - chunkStart = currentIndex + 1; - appendStringNode.execute(builder, T_ESC_T); - break; - default: - if (c <= 0x1f || (asciiOnly && c > '~')) { - appendSubstring(builder, s, chunkStart, currentIndex, appendStringNode, substringNode); - chunkStart = currentIndex + 1; - if (c <= 0xffff) { - appendEscapedUtf16((char) c, builder, appendCodePointNode); - } else { - // split SMP codepoint to surrogate pair - appendEscapedUtf16((char) (0xD800 + ((c - 0x10000) >> 10)), builder, appendCodePointNode); - appendEscapedUtf16((char) (0xDC00 + ((c - 0x10000) & 0x3FF)), builder, appendCodePointNode); - } - } - break; + @InliningRoot + private static void appendLongString(TruffleString ts, TruffleStringBuilderUTF32 builder, boolean asciiOnly, + TruffleString.ByteIndexOfCodePointSetNode byteIndexOfCodePointSetNode1, + TruffleString.ByteIndexOfCodePointSetNode byteIndexOfCodePointSetNode2, + TruffleString.CodePointAtIndexUTF32Node codePointAtNode, + TruffleStringBuilder.AppendStringNode appendStringNode, + TruffleStringBuilder.AppendSubstringByteIndexNode appendSubstringNode, + TruffleString.FromByteArrayNode fromByteArrayNode, + int byteLength) { + int lastEscape = 0; + while (lastEscape < byteLength) { + int pos = asciiOnly + ? byteIndexOfCodePointSetNode1.execute(ts, lastEscape, byteLength, JSON_ESCAPE_CHARS_ASCII_ONLY) + : byteIndexOfCodePointSetNode2.execute(ts, lastEscape, byteLength, JSON_ESCAPE_CHARS); + int substringLength = (pos < 0 ? ts.byteLength(TS_ENCODING) : pos) - lastEscape; + if (substringLength > 0) { + appendSubstringNode.execute(builder, ts, lastEscape, substringLength); } - currentIndex++; + if (pos < 0) { + break; + } + appendStringNode.execute(builder, getEscaped(codePointAtNode.execute(ts, StringUtils.byteIndexToCodepointIndex(pos)), fromByteArrayNode)); + lastEscape = pos + 4; } - appendSubstring(builder, s, chunkStart, currentIndex, appendStringNode, substringNode); - appendCodePointNode.execute(builder, '"', 1, true); } - private static void appendSubstring(TruffleStringBuilder builder, TruffleString s, int startIndex, int endIndex, AppendStringNode appendStringNode, SubstringNode substringNode) { - if (startIndex < endIndex) { - appendStringNode.execute(builder, substringNode.execute(s, startIndex, endIndex - startIndex, TS_ENCODING, true)); - } + private static TruffleString getEscaped(int c, TruffleString.FromByteArrayNode fromByteArrayNode) { + return switch (c) { + case '\\' -> T_ESC_BACKSLASH; + case '"' -> T_ESC_QUOTE; + case '\b' -> T_ESC_B; + case '\f' -> T_ESC_F; + case '\n' -> T_ESC_N; + case '\r' -> T_ESC_R; + case '\t' -> T_ESC_T; + default -> fromByteArrayNode.execute(utf16Escape(c), TruffleString.Encoding.US_ASCII, false); + }; } - private static void appendEscapedUtf16(char c, TruffleStringBuilder builder, TruffleStringBuilder.AppendCodePointNode appendCodePointNode) { - appendCodePointNode.execute(builder, '\\', 1, true); - appendCodePointNode.execute(builder, 'u', 1, true); - appendCodePointNode.execute(builder, HEXDIGITS[(c >> 12) & 0xf], 1, true); - appendCodePointNode.execute(builder, HEXDIGITS[(c >> 8) & 0xf], 1, true); - appendCodePointNode.execute(builder, HEXDIGITS[(c >> 4) & 0xf], 1, true); - appendCodePointNode.execute(builder, HEXDIGITS[c & 0xf], 1, true); + private static byte[] utf16Escape(int c) { + if (c <= 0xffff) { + return new byte[]{'\\', 'u', HEXDIGITS[(c >> 12) & 0xf], HEXDIGITS[(c >> 8) & 0xf], HEXDIGITS[(c >> 4) & 0xf], HEXDIGITS[c & 0xf]}; + } else { + // split SMP codepoint to surrogate pair + char c1 = (char) (0xD800 + ((c - 0x10000) >> 10)); + char c2 = (char) (0xDC00 + ((c - 0x10000) & 0x3FF)); + return new byte[]{ + '\\', 'u', HEXDIGITS[(c1 >> 12) & 0xf], HEXDIGITS[(c1 >> 8) & 0xf], HEXDIGITS[(c1 >> 4) & 0xf], HEXDIGITS[c1 & 0xf], + '\\', 'u', HEXDIGITS[(c2 >> 12) & 0xf], HEXDIGITS[(c2 >> 8) & 0xf], HEXDIGITS[(c2 >> 4) & 0xf], HEXDIGITS[c2 & 0xf]}; + } } } diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/json/PJSONEncoder.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/json/PJSONEncoder.java index 7a01f530ab..fc69a59303 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/json/PJSONEncoder.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/json/PJSONEncoder.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0 @@ -42,6 +42,7 @@ import java.util.IdentityHashMap; +import com.oracle.graal.python.builtins.objects.PNone; import com.oracle.graal.python.builtins.objects.object.PythonBuiltinObject; import com.oracle.truffle.api.CompilerAsserts; import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; @@ -86,6 +87,13 @@ public PJSONEncoder(Object cls, Shape instanceShape, Object markers, Object defa this.fastEncode = fastEncode; } + public boolean isDefault() { + // skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, sort_keys=False, + // indent=None, separators=None, default=None + return !skipKeys && fastEncode == FastEncode.FastEncodeAscii && markers != null && allowNan && !sortKeys && indent == PNone.NONE && defaultFn == PNone.NONE; + + } + @TruffleBoundary void removeCircular(Object obj) { circular.remove(obj); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/pickle/PPickler.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/pickle/PPickler.java index 82341e09d4..21d27a0e36 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/pickle/PPickler.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/pickle/PPickler.java @@ -1416,7 +1416,7 @@ private void saveUnicode(VirtualFrame frame, PPickler pickler, Object obj) { if (pickler.isBin()) { writeUnicodeBinary(frame, pickler, obj); } else { - byte[] encoded = PickleUtils.rawUnicodeEscape(asStringStrict(obj), ensureTsCodePointLengthNode(), ensureTsCodePointAtIndexNode()); + byte[] encoded = PickleUtils.rawUnicodeEscape(asStringStrict(obj), ensureTsCodePointLengthNode(), ensureTsCodePointAtIndexUTF32Node()); write(pickler, PickleUtils.OPCODE_UNICODE); write(pickler, encoded); writeASCII(pickler, T_NEWLINE); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/pickle/PickleUtils.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/pickle/PickleUtils.java index ba48abdb2c..683df9bc07 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/pickle/PickleUtils.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/pickle/PickleUtils.java @@ -326,11 +326,11 @@ private static TruffleString decodeStrict(byte[] data, int len, TruffleString.Fr return switchEncodingNode.execute(ret, TS_ENCODING); } - public static byte[] rawUnicodeEscape(TruffleString unicode, TruffleString.CodePointLengthNode codePointLengthNode, TruffleString.CodePointAtIndexNode codePointAtIndexNode) { + public static byte[] rawUnicodeEscape(TruffleString unicode, TruffleString.CodePointLengthNode codePointLengthNode, TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode) { int len = codePointLengthNode.execute(unicode, TS_ENCODING); ByteArrayBuffer buffer = new ByteArrayBuffer(len); for (int i = 0; i < len; i++) { - final int ch = codePointAtIndexNode.execute(unicode, i, TS_ENCODING); + final int ch = codePointAtIndexNode.execute(unicode, i); if (ch >= 0x10000) { // Map 32-bit characters to \Uxxxxxxxx buffer.append('\\'); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/pickle/PicklerNodes.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/pickle/PicklerNodes.java index ad4de80b61..e838b83e36 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/pickle/PicklerNodes.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/pickle/PicklerNodes.java @@ -153,7 +153,7 @@ abstract static class BasePickleNode extends Node { @Child private PyObjectReprAsTruffleStringNode reprNode; @Child private TruffleString.FromByteArrayNode tsFromByteArrayNode; @Child private TruffleString.CodePointLengthNode tsCodePointLengthNode; - @Child private TruffleString.CodePointAtIndexNode tsCodePointAtIndexNode; + @Child private TruffleString.CodePointAtIndexUTF32Node tsCodePointAtIndexUTF32Node; @Child private TruffleString.FromLongNode tsFromLongNode; @Child private TruffleString.IndexOfStringNode tsIndexOfStringNode; @Child private TruffleString.SubstringNode tsSubstringNode; @@ -198,12 +198,12 @@ protected TruffleString.CodePointLengthNode ensureTsCodePointLengthNode() { return tsCodePointLengthNode; } - protected TruffleString.CodePointAtIndexNode ensureTsCodePointAtIndexNode() { - if (tsCodePointAtIndexNode == null) { + protected TruffleString.CodePointAtIndexUTF32Node ensureTsCodePointAtIndexUTF32Node() { + if (tsCodePointAtIndexUTF32Node == null) { CompilerDirectives.transferToInterpreterAndInvalidate(); - tsCodePointAtIndexNode = insert(TruffleString.CodePointAtIndexNode.create()); + tsCodePointAtIndexUTF32Node = insert(TruffleString.CodePointAtIndexUTF32Node.create()); } - return tsCodePointAtIndexNode; + return tsCodePointAtIndexUTF32Node; } protected TruffleString.FromLongNode ensureTsFromLongNode() { diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/re/PatternBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/re/PatternBuiltins.java index 7bdf3df851..2dc57a4264 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/re/PatternBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/re/PatternBuiltins.java @@ -989,8 +989,7 @@ static Object doString(Node inliningTarget, VirtualFrame frame, PythonObject pat @Cached CastToTruffleStringNode cast, @Cached @Exclusive SubnInnerNode2 innerNode) { TruffleString input = cast.castKnownString(inliningTarget, inputObj); - assert TS_ENCODING == TruffleString.Encoding.UTF_32 : "remove the >> 2 when switching to UTF-8"; - int stringLength = input.byteLength(TS_ENCODING) >> 2; + int stringLength = StringUtils.byteIndexToCodepointIndex(input.byteLength(TS_ENCODING)); TruffleStringBuilderUTF32 result = TruffleStringBuilder.createUTF32(Math.max(32, stringLength)); return innerNode.execute(inliningTarget, frame, pattern, compiledRegex, compiledRegexMustAdvance, replacement, input, inputObj, count, false, isCallable, returnTuple, stringLength, result); @@ -1341,7 +1340,7 @@ abstract static class ParseReplacementNode extends Node { @Specialization static ParsedReplacement parseReplacement(Node inliningTarget, VirtualFrame frame, Object tregexCompiledRegex, TruffleString replacement, boolean binary, @Cached TruffleString.ByteIndexOfCodePointNode indexOfNode, - @Cached TruffleString.CodePointAtByteIndexNode codePointAtByteIndexNode, + @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtNode, @Cached TruffleString.SubstringByteIndexNode substringByteIndexNode, @Cached TruffleString.GetCodeRangeNode getCodeRangeNode, @Cached TRegexUtil.InteropReadMemberNode readGroupCountNode, @@ -1370,9 +1369,9 @@ static ParsedReplacement parseReplacement(Node inliningTarget, VirtualFrame fram if (nextCPPos >= length) { throw raiseRegexErrorNode.execute(frame, BAD_ESCAPE_END_OF_STRING, replacement, toCodepointIndex(length, binary)); } - int firstCodepoint = codePointAtByteIndexNode.execute(replacement, nextCPPos, encoding); + int firstCodepoint = codePointAtNode.execute(replacement, toCodepointIndex(nextCPPos, binary)); nextCPPos += codepointLengthAscii; - int secondCodepoint = nextCPPos < length ? codePointAtByteIndexNode.execute(replacement, nextCPPos, encoding) : -1; + int secondCodepoint = nextCPPos < length ? codePointAtNode.execute(replacement, toCodepointIndex(nextCPPos, binary)) : -1; if (firstCodepoint == 'g') { if (secondCodepoint != '<') { throw raiseRegexErrorNode.execute(frame, MISSING_LEFT_ANGLE_BRACKET, replacement, toCodepointIndex(nextCPPos, binary)); @@ -1392,7 +1391,7 @@ static ParsedReplacement parseReplacement(Node inliningTarget, VirtualFrame fram if (ascii) { groupNumber = 0; for (int i = 0; i < nameLength; i += codepointLengthAscii) { - int d = codePointAtByteIndexNode.execute(name, i, encoding); + int d = codePointAtNode.execute(name, toCodepointIndex(i, binary)); if (isDecimalDigit(d)) { groupNumber = (groupNumber * 10) + digitValue(d); } else { @@ -1426,7 +1425,7 @@ static ParsedReplacement parseReplacement(Node inliningTarget, VirtualFrame fram nextCPPos += codepointLengthAscii; octalEscape = digitValue(secondCodepoint); if (nextCPPos < length) { - int thirdCodepoint = codePointAtByteIndexNode.execute(replacement, nextCPPos, encoding); + int thirdCodepoint = codePointAtNode.execute(replacement, toCodepointIndex(nextCPPos, binary)); if (isOctalDigit(thirdCodepoint)) { nextCPPos += codepointLengthAscii; octalEscape = (octalEscape * 8) + digitValue(thirdCodepoint); @@ -1443,7 +1442,7 @@ static ParsedReplacement parseReplacement(Node inliningTarget, VirtualFrame fram nextCPPos += codepointLengthAscii; int thirdCodepoint; if (Math.max(firstCodepoint, secondCodepoint) <= '7' && nextCPPos < length && - isOctalDigit(thirdCodepoint = codePointAtByteIndexNode.execute(replacement, nextCPPos, encoding))) { + isOctalDigit(thirdCodepoint = codePointAtNode.execute(replacement, toCodepointIndex(nextCPPos, binary)))) { nextCPPos += codepointLengthAscii; // Single and double-digit escapes are group references, but three-digit // escapes are octal character codes. Hopefully this will be deprecated @@ -1565,12 +1564,12 @@ static Object createSubstring(Node inliningTarget, TruffleString input, boolean private static int toByteIndex(int index, boolean binary) { assert TS_ENCODING == TruffleString.Encoding.UTF_32 : "remove this method when switching to UTF-8"; - return binary ? index : index << 2; + return binary ? index : StringUtils.codepointIndexToByteIndex(index); } - private static int toCodepointIndex(int i, boolean binary) { + private static int toCodepointIndex(int index, boolean binary) { assert TS_ENCODING == TruffleString.Encoding.UTF_32 : "remove this when switching to UTF-8"; - return binary ? i : i >> 2; + return binary ? index : StringUtils.byteIndexToCodepointIndex(index); } private static int digitValue(int d) { diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/array/ArrayBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/array/ArrayBuiltins.java index ae697190e4..402c0de7ab 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/array/ArrayBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/array/ArrayBuiltins.java @@ -162,6 +162,7 @@ import com.oracle.truffle.api.profiles.ValueProfile; import com.oracle.truffle.api.strings.TruffleString; import com.oracle.truffle.api.strings.TruffleStringBuilder; +import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; import com.oracle.truffle.api.strings.TruffleStringIterator; @CoreFunctions(extendClasses = PythonBuiltinClassType.PArray) @@ -366,7 +367,7 @@ abstract static class GetFormatCheckedNode extends Node { @Specialization static BufferFormat get(Node inliningTarget, TruffleString typeCode, @Cached TruffleString.CodePointLengthNode lengthNode, - @Cached TruffleString.CodePointAtIndexNode atIndexNode, + @Cached TruffleString.CodePointAtIndexUTF32Node atIndexNode, @Cached PRaiseNode raise, @Cached(value = "createIdentityProfile()", inline = false) ValueProfile valueProfile) { if (lengthNode.execute(typeCode, TS_ENCODING) != 1) { @@ -617,7 +618,7 @@ static TruffleString repr(VirtualFrame frame, PArray self, @Cached ArrayNodes.GetValueNode getValueNode, @Cached TruffleStringBuilder.AppendStringNode appendStringNode, @Cached TruffleStringBuilder.ToStringNode toStringNode) { - TruffleStringBuilder sb = TruffleStringBuilder.create(TS_ENCODING); + TruffleStringBuilderUTF32 sb = TruffleStringBuilder.createUTF32(); appendStringNode.execute(sb, T_ARRAY); appendStringNode.execute(sb, T_LPAREN); appendStringNode.execute(sb, T_SINGLE_QUOTE); @@ -1397,7 +1398,7 @@ static TruffleString tounicode(PArray self, if (formatProfile.profile(inliningTarget, self.getFormat() != BufferFormat.UNICODE)) { throw raiseNode.raise(inliningTarget, ValueError, ErrorMessages.MAY_ONLY_BE_CALLED_ON_UNICODE_TYPE_ARRAYS); } - TruffleStringBuilder sb = TruffleStringBuilder.create(TS_ENCODING); + TruffleStringBuilderUTF32 sb = TruffleStringBuilder.createUTF32(); int length = self.getLength(); for (int i = 0; i < length; i++) { appendStringNode.execute(sb, (TruffleString) getValueNode.execute(inliningTarget, self, i)); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/bytes/ByteArrayBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/bytes/ByteArrayBuiltins.java index 87c258a3db..4a3f2139cf 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/bytes/ByteArrayBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/bytes/ByteArrayBuiltins.java @@ -35,7 +35,6 @@ import static com.oracle.graal.python.runtime.exception.PythonErrorType.MemoryError; import static com.oracle.graal.python.runtime.exception.PythonErrorType.TypeError; import static com.oracle.graal.python.runtime.exception.PythonErrorType.ValueError; -import static com.oracle.graal.python.util.PythonUtils.TS_ENCODING; import static com.oracle.graal.python.util.PythonUtils.tsLiteral; import java.util.List; @@ -123,6 +122,7 @@ import com.oracle.truffle.api.profiles.InlinedConditionProfile; import com.oracle.truffle.api.strings.TruffleString; import com.oracle.truffle.api.strings.TruffleStringBuilder; +import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; @CoreFunctions(extendClasses = PythonBuiltinClassType.PByteArray) @HashNotImplemented @@ -405,7 +405,7 @@ static Object repr(PByteArray self, SequenceStorage store = self.getSequenceStorage(); byte[] bytes = getBytes.execute(inliningTarget, store); int len = store.length(); - TruffleStringBuilder sb = TruffleStringBuilder.create(TS_ENCODING); + TruffleStringBuilderUTF32 sb = TruffleStringBuilder.createUTF32(); TruffleString typeName = getNameNode.execute(inliningTarget, getClassNode.execute(inliningTarget, self)); appendStringNode.execute(sb, typeName); appendCodePointNode.execute(sb, '(', 1, true); @@ -788,7 +788,7 @@ public static int alloc(PByteArray byteArray) { static Object commonReduce(int proto, byte[] bytes, int len, Object clazz, Object dict, PythonLanguage language, TruffleStringBuilder.AppendCodePointNode appendCodePointNode, TruffleStringBuilder.ToStringNode toStringNode) { - TruffleStringBuilder sb = TruffleStringBuilder.create(TS_ENCODING); + TruffleStringBuilderUTF32 sb = TruffleStringBuilder.createUTF32(); BytesUtils.repr(sb, bytes, len, appendCodePointNode); TruffleString str = toStringNode.execute(sb); Object contents; diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/bytes/BytesCommonBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/bytes/BytesCommonBuiltins.java index d81cc3b2a1..be1e1476e3 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/bytes/BytesCommonBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/bytes/BytesCommonBuiltins.java @@ -760,13 +760,13 @@ static byte pstring(Object strObj, @Bind Node inliningTarget, @Cached CastToTruffleStringNode toStr, @Cached TruffleString.CodePointLengthNode codePointLengthNode, - @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode, + @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, @Exclusive @Cached PRaiseNode raiseNode) { TruffleString str = toStr.execute(inliningTarget, strObj); if (codePointLengthNode.execute(str, TS_ENCODING) != 1) { throw raiseNode.raise(inliningTarget, ValueError, SEP_MUST_BE_LENGTH_1); } - int cp = codePointAtIndexNode.execute(str, 0, TS_ENCODING); + int cp = codePointAtIndexNode.execute(str, 0); if (cp > 127) { throw raiseNode.raise(inliningTarget, ValueError, SEP_MUST_BE_ASCII); } diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/bytes/BytesNodes.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/bytes/BytesNodes.java index e38611a697..174262daa0 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/bytes/BytesNodes.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/bytes/BytesNodes.java @@ -130,6 +130,7 @@ import com.oracle.truffle.api.strings.TruffleString; import com.oracle.truffle.api.strings.TruffleString.Encoding; import com.oracle.truffle.api.strings.TruffleStringBuilder; +import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; import com.oracle.truffle.api.strings.TruffleStringIterator; public abstract class BytesNodes { @@ -1182,7 +1183,7 @@ public static TruffleString repr(Node inliningTarget, Object self, SequenceStorage store = getBytesStorage.execute(inliningTarget, self); byte[] bytes = getBytes.execute(inliningTarget, store); int len = store.length(); - TruffleStringBuilder sb = TruffleStringBuilder.create(TS_ENCODING); + TruffleStringBuilderUTF32 sb = TruffleStringBuilder.createUTF32(); BytesUtils.reprLoop(sb, bytes, len, appendCodePointNode); return toStringNode.execute(sb); } diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/bytes/BytesUtils.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/bytes/BytesUtils.java index d3bf77b686..8b6c1f2bb2 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/bytes/BytesUtils.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/bytes/BytesUtils.java @@ -34,6 +34,7 @@ import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; import com.oracle.truffle.api.strings.TruffleString; import com.oracle.truffle.api.strings.TruffleStringBuilder; +import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; public final class BytesUtils { @@ -314,7 +315,7 @@ public static char figureOutQuote(byte[] bytes, int len) { return quote; } - public static void reprLoop(TruffleStringBuilder sb, byte[] bytes, int len, TruffleStringBuilder.AppendCodePointNode appendCodePointNode) { + public static void reprLoop(TruffleStringBuilderUTF32 sb, byte[] bytes, int len, TruffleStringBuilder.AppendCodePointNode appendCodePointNode) { char quote = figureOutQuote(bytes, len); appendCodePointNode.execute(sb, 'b', 1, true); appendCodePointNode.execute(sb, quote, 1, true); @@ -325,7 +326,7 @@ public static void reprLoop(TruffleStringBuilder sb, byte[] bytes, int len, Truf } @TruffleBoundary - private static void byteRepr(TruffleStringBuilder sb, byte b, boolean isSingleQuote, TruffleStringBuilder.AppendCodePointNode appendCodePointNode) { + private static void byteRepr(TruffleStringBuilderUTF32 sb, byte b, boolean isSingleQuote, TruffleStringBuilder.AppendCodePointNode appendCodePointNode) { if (b == '\t') { appendCodePointNode.execute(sb, '\\', 1, true); appendCodePointNode.execute(sb, 't', 1, true); @@ -354,7 +355,7 @@ private static void byteRepr(TruffleStringBuilder sb, byte b, boolean isSingleQu } } - public static void repr(TruffleStringBuilder sb, byte[] bytes, int len, TruffleStringBuilder.AppendCodePointNode appendCodePointNode) { + public static void repr(TruffleStringBuilderUTF32 sb, byte[] bytes, int len, TruffleStringBuilder.AppendCodePointNode appendCodePointNode) { for (int i = 0; i < len; i++) { byteRepr(sb, bytes[i], true, appendCodePointNode); } @@ -367,7 +368,7 @@ public static String bytesRepr(byte[] bytes, int length) { len = bytes.length; } - TruffleStringBuilder sb = TruffleStringBuilder.create(TS_ENCODING); + TruffleStringBuilderUTF32 sb = TruffleStringBuilder.createUTF32(); sb.appendCodePointUncached('b'); sb.appendCodePointUncached('\''); repr(sb, bytes, len, TruffleStringBuilder.AppendCodePointNode.getUncached()); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/CExtNodes.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/CExtNodes.java index beb91d86ad..9f14234c87 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/CExtNodes.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/cext/capi/CExtNodes.java @@ -765,9 +765,9 @@ public abstract static class CastToNativeLongNode extends PNodeWithContext { @Specialization(guards = "lengthNode.execute(value, TS_ENCODING) == 1", limit = "1") static long doString(TruffleString value, - @Cached TruffleString.CodePointAtIndexNode codepointAtIndexNode, + @Cached TruffleString.CodePointAtIndexUTF32Node codepointAtIndexNode, @SuppressWarnings("unused") @Cached TruffleString.CodePointLengthNode lengthNode) { - return codepointAtIndexNode.execute(value, 0, TS_ENCODING); + return codepointAtIndexNode.execute(value, 0); } @Specialization diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/common/BufferStorageNodes.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/common/BufferStorageNodes.java index 6a1ca05e49..4820d6256a 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/common/BufferStorageNodes.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/common/BufferStorageNodes.java @@ -346,11 +346,11 @@ static void packDouble(Node inliningTarget, @SuppressWarnings("unused") BufferFo @Shared @CachedLibrary(limit = "3") PythonBufferAccessLibrary bufferLib, @Cached StringNodes.CastToTruffleStringChecked0Node cast, @Cached TruffleString.CodePointLengthNode codePointLengthNode, - @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode, + @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, @Exclusive @Cached PRaiseNode raiseNode) { TruffleString str = cast.cast(inliningTarget, object, ErrorMessages.ARRAY_ITEM_MUST_BE_UNICODE); if (codePointLengthNode.execute(str, TS_ENCODING) == 1) { - int codePoint = codePointAtIndexNode.execute(str, 0, TS_ENCODING); + int codePoint = codePointAtIndexNode.execute(str, 0); bufferLib.writeInt(buffer, offset, codePoint); } else { throw raiseNode.raise(inliningTarget, TypeError); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/deque/DequeBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/deque/DequeBuiltins.java index c2bfae5d59..2e35ba8f06 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/deque/DequeBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/deque/DequeBuiltins.java @@ -58,7 +58,6 @@ import static com.oracle.graal.python.nodes.StringLiterals.T_ELLIPSIS_IN_BRACKETS; import static com.oracle.graal.python.nodes.StringLiterals.T_LPAREN; import static com.oracle.graal.python.nodes.StringLiterals.T_RPAREN; -import static com.oracle.graal.python.util.PythonUtils.TS_ENCODING; import static com.oracle.graal.python.util.PythonUtils.toTruffleStringUncached; import java.util.Iterator; @@ -136,6 +135,7 @@ import com.oracle.truffle.api.profiles.InlinedConditionProfile; import com.oracle.truffle.api.strings.TruffleString; import com.oracle.truffle.api.strings.TruffleStringBuilder; +import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; @CoreFunctions(extendClasses = PythonBuiltinClassType.PDeque) @HashNotImplemented @@ -893,7 +893,7 @@ TruffleString repr(PDeque self) { Object[] items = self.data.toArray(); PList asList = PFactory.createList(PythonLanguage.get(null), items); int maxLength = self.getMaxLength(); - TruffleStringBuilder sb = TruffleStringBuilder.create(TS_ENCODING); + TruffleStringBuilderUTF32 sb = TruffleStringBuilder.createUTF32(); sb.appendStringUncached(GetNameNode.executeUncached(GetPythonObjectClassNode.executeUncached(self))); sb.appendStringUncached(T_LPAREN); sb.appendStringUncached(PyObjectStrAsTruffleStringNode.executeUncached(asList)); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/dict/DictReprBuiltin.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/dict/DictReprBuiltin.java index d3c50a45c9..177943346b 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/dict/DictReprBuiltin.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/dict/DictReprBuiltin.java @@ -50,7 +50,6 @@ import static com.oracle.graal.python.nodes.StringLiterals.T_LPAREN; import static com.oracle.graal.python.nodes.StringLiterals.T_RBRACE; import static com.oracle.graal.python.nodes.StringLiterals.T_RPAREN; -import static com.oracle.graal.python.util.PythonUtils.TS_ENCODING; import static com.oracle.graal.python.util.PythonUtils.tsLiteral; import java.util.List; @@ -110,6 +109,7 @@ import com.oracle.truffle.api.profiles.InlinedConditionProfile; import com.oracle.truffle.api.strings.TruffleString; import com.oracle.truffle.api.strings.TruffleStringBuilder; +import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; @CoreFunctions(extendClasses = {PythonBuiltinClassType.PDictKeysView, PythonBuiltinClassType.PDictItemsView, PythonBuiltinClassType.PDictValuesView, PythonBuiltinClassType.PDict}) public final class DictReprBuiltin extends PythonBuiltins { @@ -133,15 +133,15 @@ abstract static class ReprNode extends PythonUnaryBuiltinNode { @ValueType protected static final class ReprState { private final Object self; - private final TruffleStringBuilder result; + private final TruffleStringBuilderUTF32 result; private final int initialLength; private final boolean ellipsisInBraces; - ReprState(Object self, TruffleStringBuilder result) { + ReprState(Object self, TruffleStringBuilderUTF32 result) { this(self, result, true); } - ReprState(Object self, TruffleStringBuilder result, boolean ellipsisInBraces) { + ReprState(Object self, TruffleStringBuilderUTF32 result, boolean ellipsisInBraces) { this.self = self; this.result = result; this.ellipsisInBraces = ellipsisInBraces; @@ -309,7 +309,7 @@ public static TruffleString repr(Object dict, return T_ELLIPSIS_IN_BRACES; } try { - TruffleStringBuilder sb = TruffleStringBuilder.create(TS_ENCODING); + TruffleStringBuilderUTF32 sb = TruffleStringBuilder.createUTF32(); appendStringNode.execute(sb, T_LBRACE); var storage = getStorageNode.execute(inliningTarget, dict); forEachNode.execute(null, inliningTarget, storage, consumerNode, new ReprState(dict, sb)); @@ -361,7 +361,7 @@ public static TruffleString repr(PDictItemsView view, private static TruffleString viewRepr(Node inliningTarget, PDictView view, TruffleString type, HashingStorageForEach forEachNode, AbstractForEachRepr consumerNode, TruffleStringBuilder.AppendStringNode appendStringNode, TruffleStringBuilder.ToStringNode toStringNode) { - TruffleStringBuilder sb = TruffleStringBuilder.create(TS_ENCODING); + TruffleStringBuilderUTF32 sb = TruffleStringBuilder.createUTF32(); appendStringNode.execute(sb, type); appendStringNode.execute(sb, T_LPAREN_BRACKET); HashingStorage dictStorage = view.getWrappedStorage(); @@ -396,10 +396,10 @@ public static void keyValue(Object key, Object value, ReprState s, @GenerateInline(false) // 116 -> 100 public abstract static class ReprOrderedDictItemsNode extends Node { - public abstract void execute(VirtualFrame frame, POrderedDict dict, TruffleStringBuilder sb); + public abstract void execute(VirtualFrame frame, POrderedDict dict, TruffleStringBuilderUTF32 sb); @Specialization - static void repr(VirtualFrame frame, POrderedDict dict, TruffleStringBuilder sb, + static void repr(VirtualFrame frame, POrderedDict dict, TruffleStringBuilderUTF32 sb, @Bind Node inliningTarget, @Cached PyObjectCallMethodObjArgs callMethod, @Cached PyObjectGetIter getIter, diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/exception/BaseExceptionBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/exception/BaseExceptionBuiltins.java index 972674b9a8..2a0fc3b46b 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/exception/BaseExceptionBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/exception/BaseExceptionBuiltins.java @@ -41,7 +41,6 @@ import static com.oracle.graal.python.nodes.StringLiterals.T_EMPTY_STRING; import static com.oracle.graal.python.nodes.StringLiterals.T_LPAREN; import static com.oracle.graal.python.nodes.StringLiterals.T_RPAREN; -import static com.oracle.graal.python.util.PythonUtils.TS_ENCODING; import java.util.List; @@ -120,6 +119,7 @@ import com.oracle.truffle.api.profiles.InlinedBranchProfile; import com.oracle.truffle.api.profiles.InlinedConditionProfile; import com.oracle.truffle.api.strings.TruffleStringBuilder; +import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; @CoreFunctions(extendClasses = PythonBuiltinClassType.PBaseException) public final class BaseExceptionBuiltins extends PythonBuiltins { @@ -459,7 +459,7 @@ Object repr(VirtualFrame frame, Object self, @Cached TruffleStringBuilder.AppendStringNode appendStringNode, @Cached TruffleStringBuilder.ToStringNode toStringNode) { Object type = getClassNode.execute(inliningTarget, self); - TruffleStringBuilder sb = TruffleStringBuilder.create(TS_ENCODING); + TruffleStringBuilderUTF32 sb = TruffleStringBuilder.createUTF32(); appendStringNode.execute(sb, castStringNode.execute(inliningTarget, getAttrNode.execute(frame, inliningTarget, type, T___NAME__))); PTuple args = getArgsNode.execute(inliningTarget, self); SequenceStorage argsStorage = args.getSequenceStorage(); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/exception/BaseExceptionGroupBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/exception/BaseExceptionGroupBuiltins.java index 240feaead9..43b16365cc 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/exception/BaseExceptionGroupBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/exception/BaseExceptionGroupBuiltins.java @@ -48,7 +48,6 @@ import static com.oracle.graal.python.nodes.SpecialMethodNames.J___CLASS_GETITEM__; import static com.oracle.graal.python.runtime.exception.PythonErrorType.TypeError; import static com.oracle.graal.python.runtime.exception.PythonErrorType.ValueError; -import static com.oracle.graal.python.util.PythonUtils.TS_ENCODING; import static com.oracle.graal.python.util.PythonUtils.tsLiteral; import java.util.ArrayList; @@ -111,6 +110,7 @@ import com.oracle.truffle.api.profiles.InlinedLoopConditionProfile; import com.oracle.truffle.api.strings.TruffleString; import com.oracle.truffle.api.strings.TruffleStringBuilder; +import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; @CoreFunctions(extendClasses = PythonBuiltinClassType.PBaseExceptionGroup) public class BaseExceptionGroupBuiltins extends PythonBuiltins { @@ -226,7 +226,7 @@ static TruffleString str(PBaseExceptionGroup self, @Cached TruffleStringBuilder.AppendCodePointNode appendCodePointNode, @Cached TruffleStringBuilder.AppendIntNumberNode appendIntNumberNode, @Cached TruffleStringBuilder.ToStringNode toStringNode) { - TruffleStringBuilder builder = TruffleStringBuilder.create(TS_ENCODING); + TruffleStringBuilderUTF32 builder = TruffleStringBuilder.createUTF32(); appendStringNode.execute(builder, self.getMessage()); appendStringNode.execute(builder, T1); appendIntNumberNode.execute(builder, self.getExceptions().length); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/exception/ExceptionNodes.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/exception/ExceptionNodes.java index 8739451e74..f987db30d5 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/exception/ExceptionNodes.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/exception/ExceptionNodes.java @@ -83,6 +83,7 @@ import com.oracle.truffle.api.profiles.InlinedConditionProfile; import com.oracle.truffle.api.strings.TruffleString; import com.oracle.truffle.api.strings.TruffleStringBuilder; +import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; public final class ExceptionNodes { private static Object nullToNone(Object obj) { @@ -447,7 +448,7 @@ public static TruffleString getMetaObjectName(Object self) { @TruffleBoundary private static TruffleString concat(TruffleString a, TruffleString b) { - TruffleStringBuilder sb = TruffleStringBuilder.create(TS_ENCODING); + TruffleStringBuilderUTF32 sb = TruffleStringBuilder.createUTF32(); sb.appendStringUncached(a); sb.appendStringUncached(T_COLON_SPACE); sb.appendStringUncached(b); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/exception/UnicodeEncodeErrorBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/exception/UnicodeEncodeErrorBuiltins.java index e0ee0e6fca..42456539ce 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/exception/UnicodeEncodeErrorBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/exception/UnicodeEncodeErrorBuiltins.java @@ -132,7 +132,7 @@ TruffleString str(VirtualFrame frame, PBaseException self, @Cached CastToTruffleStringNode toTruffleStringNode, @Cached PyObjectStrAsTruffleStringNode strNode, @Cached TruffleString.CodePointLengthNode codePointLengthNode, - @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode, + @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, @Cached SimpleTruffleStringFormatNode simpleTruffleStringFormatNode) { if (self.getExceptionAttributes() == null) { // Not properly initialized. @@ -147,7 +147,7 @@ TruffleString str(VirtualFrame frame, PBaseException self, final TruffleString encoding = strNode.execute(frame, inliningTarget, attrNode.get(self, IDX_ENCODING, UNICODE_ERROR_ATTR_FACTORY)); final TruffleString reason = strNode.execute(frame, inliningTarget, attrNode.get(self, IDX_REASON, UNICODE_ERROR_ATTR_FACTORY)); if (start < codePointLengthNode.execute(object, TS_ENCODING) && end == start + 1) { - final int badChar = codePointAtIndexNode.execute(object, start, TS_ENCODING); + final int badChar = codePointAtIndexNode.execute(object, start); String badCharStr; if (badChar <= 0xFF) { badCharStr = PythonUtils.formatJString("\\x%02x", badChar); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/exception/UnicodeTranslateErrorBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/exception/UnicodeTranslateErrorBuiltins.java index e86e691d16..e487569429 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/exception/UnicodeTranslateErrorBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/exception/UnicodeTranslateErrorBuiltins.java @@ -125,7 +125,7 @@ TruffleString str(VirtualFrame frame, PBaseException self, @Cached BaseExceptionAttrNode attrNode, @Cached CastToTruffleStringNode toStringNode, @Cached TruffleString.CodePointLengthNode codePointLengthNode, - @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode, + @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, @Cached PyObjectStrAsTruffleStringNode strNode, @Cached SimpleTruffleStringFormatNode simpleTruffleStringFormatNode) { if (self.getExceptionAttributes() == null) { @@ -140,7 +140,7 @@ TruffleString str(VirtualFrame frame, PBaseException self, final int end = attrNode.getInt(self, IDX_END, UNICODE_ERROR_ATTR_FACTORY); final TruffleString reason = strNode.execute(frame, inliningTarget, attrNode.get(self, IDX_REASON, UNICODE_ERROR_ATTR_FACTORY)); if (start < codePointLengthNode.execute(object, TS_ENCODING) && end == start + 1) { - final int badChar = codePointAtIndexNode.execute(object, start, TS_ENCODING); + final int badChar = codePointAtIndexNode.execute(object, start); String badCharStr; if (badChar <= 0xFF) { badCharStr = PythonUtils.formatJString("\\x%02x", badChar); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/floats/FloatUtils.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/floats/FloatUtils.java index 2daac66bb1..5be05932fa 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/floats/FloatUtils.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/floats/FloatUtils.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0 @@ -236,6 +236,7 @@ public static StringToDoubleResult stringToDouble(String str, int start, int len /** * Parses a string that contains a valid string representation of a float number. */ + @TruffleBoundary public static double parseValidString(String substr) { double d = Double.parseDouble(substr); if (!Double.isFinite(d)) { diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/function/AbstractFunctionBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/function/AbstractFunctionBuiltins.java index c7dac3eea6..273a1b29b3 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/function/AbstractFunctionBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/function/AbstractFunctionBuiltins.java @@ -92,7 +92,6 @@ import com.oracle.truffle.api.profiles.InlinedBranchProfile; import com.oracle.truffle.api.profiles.InlinedConditionProfile; import com.oracle.truffle.api.strings.TruffleString; -import com.oracle.truffle.api.strings.TruffleStringBuilder; import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; @CoreFunctions(extendClasses = {PythonBuiltinClassType.PFunction, PythonBuiltinClassType.PBuiltinFunction, PythonBuiltinClassType.WrapperDescriptor}) @@ -412,7 +411,7 @@ public static TruffleString signatureToText(Signature signature, boolean skipSel return sb.toStringUncached(); } - private static boolean appendCommaIfNeeded(TruffleStringBuilder sb, boolean first) { + private static boolean appendCommaIfNeeded(TruffleStringBuilderUTF32 sb, boolean first) { if (!first) { sb.appendStringUncached(T_COMMA_SPACE); } diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/list/ListBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/list/ListBuiltins.java index 04c21a8a6a..f8def72a7c 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/list/ListBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/list/ListBuiltins.java @@ -45,11 +45,11 @@ import com.oracle.graal.python.PythonLanguage; import com.oracle.graal.python.annotations.ArgumentClinic; +import com.oracle.graal.python.annotations.Builtin; import com.oracle.graal.python.annotations.HashNotImplemented; import com.oracle.graal.python.annotations.Slot; import com.oracle.graal.python.annotations.Slot.SlotKind; import com.oracle.graal.python.annotations.Slot.SlotSignature; -import com.oracle.graal.python.annotations.Builtin; import com.oracle.graal.python.builtins.CoreFunctions; import com.oracle.graal.python.builtins.Python3Core; import com.oracle.graal.python.builtins.PythonBuiltinClassType; @@ -136,6 +136,7 @@ import com.oracle.truffle.api.profiles.InlinedLoopConditionProfile; import com.oracle.truffle.api.strings.TruffleString; import com.oracle.truffle.api.strings.TruffleStringBuilder; +import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; import com.oracle.truffle.api.strings.TruffleStringIterator; /** @@ -216,7 +217,7 @@ public TruffleString repr(VirtualFrame frame, Object self, return T_ELLIPSIS_IN_BRACKETS; } try { - TruffleStringBuilder buf = TruffleStringBuilder.create(TS_ENCODING); + TruffleStringBuilderUTF32 buf = TruffleStringBuilder.createUTF32(); appendStringNode.execute(buf, T_LBRACKET); boolean initial = true; for (int index = 0; index < length; index++) { diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/memoryview/MemoryViewBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/memoryview/MemoryViewBuiltins.java index 109abe050e..b4d9d4b8b8 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/memoryview/MemoryViewBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/memoryview/MemoryViewBuiltins.java @@ -662,7 +662,7 @@ public abstract static class CastNode extends PythonTernaryClinicBuiltinNode { static PMemoryView cast(PMemoryView self, TruffleString formatString, @SuppressWarnings("unused") PNone none, @Bind Node inliningTarget, @Shared @Cached TruffleString.CodePointLengthNode lengthNode, - @Shared @Cached TruffleString.CodePointAtIndexNode atIndexNode, + @Shared @Cached TruffleString.CodePointAtIndexUTF32Node atIndexNode, @Exclusive @Cached PRaiseNode raiseNode) { self.checkReleased(inliningTarget, raiseNode); return doCast(inliningTarget, self, formatString, 1, null, PythonContext.get(inliningTarget), lengthNode, atIndexNode, raiseNode); @@ -675,7 +675,7 @@ static PMemoryView cast(VirtualFrame frame, PMemoryView self, TruffleString form @Cached SequenceStorageNodes.GetItemScalarNode getItemScalarNode, @Cached PyNumberAsSizeNode asSizeNode, @Shared @Cached TruffleString.CodePointLengthNode lengthNode, - @Shared @Cached TruffleString.CodePointAtIndexNode atIndexNode, + @Shared @Cached TruffleString.CodePointAtIndexUTF32Node atIndexNode, @Exclusive @Cached PRaiseNode raiseNode) { self.checkReleased(inliningTarget, raiseNode); SequenceStorage storage = getSequenceStorageNode.execute(inliningTarget, shapeObj); @@ -698,7 +698,7 @@ static PMemoryView error(PMemoryView self, TruffleString format, Object shape, } private static PMemoryView doCast(Node inliningTarget, PMemoryView self, TruffleString formatString, int ndim, int[] shape, PythonContext context, TruffleString.CodePointLengthNode lengthNode, - TruffleString.CodePointAtIndexNode atIndexNode, PRaiseNode raiseNode) { + TruffleString.CodePointAtIndexUTF32Node atIndexNode, PRaiseNode raiseNode) { if (!self.isCContiguous()) { throw raiseNode.raise(inliningTarget, TypeError, ErrorMessages.MEMORYVIEW_CASTS_RESTRICTED_TO_C_CONTIGUOUS); } diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/method/ClassmethodBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/method/ClassmethodBuiltins.java index 009c2e9ac3..36aa0478a9 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/method/ClassmethodBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/method/ClassmethodBuiltins.java @@ -71,6 +71,7 @@ import com.oracle.truffle.api.nodes.Node; import com.oracle.truffle.api.strings.TruffleString; import com.oracle.truffle.api.strings.TruffleStringBuilder; +import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; @CoreFunctions(extendClasses = PythonBuiltinClassType.PClassmethod) public final class ClassmethodBuiltins extends PythonBuiltins { @@ -123,7 +124,7 @@ Object repr(VirtualFrame frame, PDecoratedMethod self, @Cached TruffleStringBuilder.AppendStringNode append, @Cached TruffleStringBuilder.ToStringNode toString) { TruffleString callableRepr = repr.execute(frame, inliningTarget, self.getCallable()); - TruffleStringBuilder sb = TruffleStringBuilder.create(TS_ENCODING, PREFIX_LEN + callableRepr.byteLength(TS_ENCODING) + SUFFIX_LEN); + TruffleStringBuilderUTF32 sb = TruffleStringBuilder.createUTF32(PREFIX_LEN + callableRepr.byteLength(TS_ENCODING) + SUFFIX_LEN); append.execute(sb, PREFIX); append.execute(sb, callableRepr); append.execute(sb, SUFFIX); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/method/StaticmethodBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/method/StaticmethodBuiltins.java index fb55ab478a..34f245b6d3 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/method/StaticmethodBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/method/StaticmethodBuiltins.java @@ -79,6 +79,7 @@ import com.oracle.truffle.api.nodes.Node; import com.oracle.truffle.api.strings.TruffleString; import com.oracle.truffle.api.strings.TruffleStringBuilder; +import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; @CoreFunctions(extendClasses = {PythonBuiltinClassType.PStaticmethod}) public final class StaticmethodBuiltins extends PythonBuiltins { @@ -169,7 +170,7 @@ Object repr(VirtualFrame frame, PDecoratedMethod self, @Cached TruffleStringBuilder.AppendStringNode append, @Cached TruffleStringBuilder.ToStringNode toString) { TruffleString callableRepr = repr.execute(frame, inliningTarget, self.getCallable()); - TruffleStringBuilder sb = TruffleStringBuilder.create(TS_ENCODING, PREFIX_LEN + callableRepr.byteLength(TS_ENCODING) + SUFFIX_LEN); + TruffleStringBuilderUTF32 sb = TruffleStringBuilder.createUTF32(PREFIX_LEN + callableRepr.byteLength(TS_ENCODING) + SUFFIX_LEN); append.execute(sb, PREFIX); append.execute(sb, callableRepr); append.execute(sb, SUFFIX); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/namespace/SimpleNamespaceBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/namespace/SimpleNamespaceBuiltins.java index 6eb567531c..b9f21b53e9 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/namespace/SimpleNamespaceBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/namespace/SimpleNamespaceBuiltins.java @@ -48,7 +48,6 @@ import static com.oracle.graal.python.nodes.StringLiterals.T_EQ; import static com.oracle.graal.python.nodes.StringLiterals.T_LPAREN; import static com.oracle.graal.python.nodes.StringLiterals.T_RPAREN; -import static com.oracle.graal.python.util.PythonUtils.TS_ENCODING; import static com.oracle.graal.python.util.PythonUtils.tsLiteral; import java.util.ArrayList; @@ -111,6 +110,7 @@ import com.oracle.truffle.api.nodes.Node; import com.oracle.truffle.api.strings.TruffleString; import com.oracle.truffle.api.strings.TruffleStringBuilder; +import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; @CoreFunctions(extendClasses = PythonBuiltinClassType.PSimpleNamespace) public final class SimpleNamespaceBuiltins extends PythonBuiltins { @@ -217,7 +217,7 @@ private void sortItemsByKey() { items.sort(Comparator.comparing(Pair::getLeft, StringUtils::compareStringsUncached)); } - public void appendToTruffleStringBuilder(TruffleStringBuilder sb, TruffleStringBuilder.AppendStringNode appendStringNode) { + public void appendToTruffleStringBuilder(TruffleStringBuilderUTF32 sb, TruffleStringBuilder.AppendStringNode appendStringNode) { sortItemsByKey(); for (int i = 0; i < items.size(); i++) { Pair item = items.get(i); @@ -295,7 +295,7 @@ public static Object repr(PSimpleNamespace ns, @Cached TruffleStringBuilder.ToStringNode toStringNode) { final Object klass = getClassNode.execute(inliningTarget, ns); final TruffleString name = clsProfile.profileClass(inliningTarget, klass, PythonBuiltinClassType.PSimpleNamespace) ? T_NAMESPACE : getNameNode.execute(inliningTarget, klass); - TruffleStringBuilder sb = TruffleStringBuilder.create(TS_ENCODING); + TruffleStringBuilderUTF32 sb = TruffleStringBuilder.createUTF32(); appendStringNode.execute(sb, name); appendStringNode.execute(sb, T_LPAREN); PythonContext ctxt = PythonContext.get(forEachNode); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/object/ObjectNodes.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/object/ObjectNodes.java index a0d0c3755a..e627c95699 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/object/ObjectNodes.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/object/ObjectNodes.java @@ -165,6 +165,7 @@ import com.oracle.truffle.api.profiles.InlinedConditionProfile; import com.oracle.truffle.api.strings.TruffleString; import com.oracle.truffle.api.strings.TruffleStringBuilder; +import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; public abstract class ObjectNodes { @@ -768,7 +769,7 @@ static TruffleString get(VirtualFrame frame, Object cls, if (equalNode.execute(moduleName, BuiltinNames.T_BUILTINS, TS_ENCODING)) { return qualName; } - TruffleStringBuilder sb = TruffleStringBuilder.create(TS_ENCODING); + TruffleStringBuilderUTF32 sb = TruffleStringBuilder.createUTF32(); appendStringNode.execute(sb, moduleName); appendStringNode.execute(sb, T_DOT); appendStringNode.execute(sb, qualName); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/ordereddict/OrderedDictBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/ordereddict/OrderedDictBuiltins.java index 795f1e1d71..4e2ba068c1 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/ordereddict/OrderedDictBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/ordereddict/OrderedDictBuiltins.java @@ -56,7 +56,6 @@ import static com.oracle.graal.python.nodes.StringLiterals.T_LPAREN; import static com.oracle.graal.python.nodes.StringLiterals.T_RBRACE; import static com.oracle.graal.python.nodes.StringLiterals.T_RPAREN; -import static com.oracle.graal.python.util.PythonUtils.TS_ENCODING; import java.util.List; @@ -126,6 +125,7 @@ import com.oracle.truffle.api.profiles.InlinedBranchProfile; import com.oracle.truffle.api.strings.TruffleString; import com.oracle.truffle.api.strings.TruffleStringBuilder; +import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; @CoreFunctions(extendClasses = PythonBuiltinClassType.POrderedDict) public class OrderedDictBuiltins extends PythonBuiltins { @@ -512,7 +512,7 @@ static Object repr(VirtualFrame frame, POrderedDict self, @Cached ReprOrderedDictItemsNode reprDictItems) { TruffleString typeName = getNameNode.execute(inliningTarget, getClassNode.execute(inliningTarget, self)); if (self.first == null) { - TruffleStringBuilder builder = TruffleStringBuilder.create(TS_ENCODING); + TruffleStringBuilderUTF32 builder = TruffleStringBuilder.createUTF32(); appendStringNode.execute(builder, typeName); appendStringNode.execute(builder, T_EMPTY_PARENS); return toStringNode.execute(builder); @@ -522,7 +522,7 @@ static Object repr(VirtualFrame frame, POrderedDict self, return T_ELLIPSIS; } try { - TruffleStringBuilder builder = TruffleStringBuilder.create(TS_ENCODING); + TruffleStringBuilderUTF32 builder = TruffleStringBuilder.createUTF32(); appendStringNode.execute(builder, typeName); appendStringNode.execute(builder, T_LPAREN); appendStringNode.execute(builder, T_LBRACE); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/set/BaseSetBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/set/BaseSetBuiltins.java index 8078682ca9..4542c92bde 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/set/BaseSetBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/set/BaseSetBuiltins.java @@ -49,14 +49,13 @@ import static com.oracle.graal.python.nodes.StringLiterals.T_LPAREN; import static com.oracle.graal.python.nodes.StringLiterals.T_RBRACE; import static com.oracle.graal.python.nodes.StringLiterals.T_RPAREN; -import static com.oracle.graal.python.util.PythonUtils.TS_ENCODING; import java.util.List; import com.oracle.graal.python.PythonLanguage; +import com.oracle.graal.python.annotations.Builtin; import com.oracle.graal.python.annotations.Slot; import com.oracle.graal.python.annotations.Slot.SlotKind; -import com.oracle.graal.python.annotations.Builtin; import com.oracle.graal.python.builtins.CoreFunctions; import com.oracle.graal.python.builtins.PythonBuiltinClassType; import com.oracle.graal.python.builtins.PythonBuiltins; @@ -111,6 +110,7 @@ import com.oracle.truffle.api.nodes.Node; import com.oracle.truffle.api.profiles.InlinedConditionProfile; import com.oracle.truffle.api.strings.TruffleStringBuilder; +import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; @CoreFunctions(extendClasses = {PythonBuiltinClassType.PSet, PythonBuiltinClassType.PFrozenSet}) public final class BaseSetBuiltins extends PythonBuiltins { @@ -124,7 +124,7 @@ protected List> getNodeFa @Slot(value = SlotKind.tp_repr, isComplex = true) @GenerateNodeFactory abstract static class BaseReprNode extends PythonUnaryBuiltinNode { - private static void fillItems(VirtualFrame frame, Node inliningTarget, HashingStorage storage, TruffleStringBuilder sb, PyObjectReprAsTruffleStringNode repr, + private static void fillItems(VirtualFrame frame, Node inliningTarget, HashingStorage storage, TruffleStringBuilderUTF32 sb, PyObjectReprAsTruffleStringNode repr, HashingStorageGetIterator getIter, HashingStorageIteratorNext iterNext, HashingStorageIteratorKey iterKey, TruffleStringBuilder.AppendStringNode appendStringNode) { boolean first = true; @@ -153,7 +153,7 @@ public static Object repr(VirtualFrame frame, PBaseSet self, @Cached HashingStorageIteratorKey iteratorKey, @Cached TruffleStringBuilder.AppendStringNode appendStringNode, @Cached TruffleStringBuilder.ToStringNode toStringNode) { - TruffleStringBuilder sb = TruffleStringBuilder.create(TS_ENCODING); + TruffleStringBuilderUTF32 sb = TruffleStringBuilder.createUTF32(); Object clazz = getClassNode.execute(inliningTarget, self); PythonContext ctxt = PythonContext.get(getNameNode); int len = lenNode.execute(inliningTarget, self.getDictStorage()); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/PString.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/PString.java index 0896380b49..ba1c90c833 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/PString.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/PString.java @@ -165,12 +165,12 @@ TruffleString asTruffleString( Object readArrayElement(long index, @Bind Node inliningTarget, @Cached CastToTruffleStringNode cast, - @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode, + @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, @Shared("gil") @Cached GilNode gil) { boolean mustRelease = gil.acquire(); try { try { - return codePointAtIndexNode.execute(cast.execute(inliningTarget, this), (int) index, TS_ENCODING); + return codePointAtIndexNode.execute(cast.execute(inliningTarget, this), (int) index); } catch (CannotCastException e) { throw CompilerDirectives.shouldNotReachHere("A PString should always have an underlying CharSequence"); } diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringBuiltins.java index cb14d670cc..3d7ac92264 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringBuiltins.java @@ -199,6 +199,7 @@ import com.oracle.truffle.api.strings.TruffleString.IndexOfStringNode; import com.oracle.truffle.api.strings.TruffleString.LastIndexOfStringNode; import com.oracle.truffle.api.strings.TruffleStringBuilder; +import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; import com.oracle.truffle.api.strings.TruffleStringIterator; /** @@ -1029,7 +1030,7 @@ static PDict doDict(VirtualFrame frame, Object cls, PDict from, Object to, Objec @Bind Node inliningTarget, @Exclusive @Cached CastToTruffleStringChecked0Node cast, @Shared("cpLen") @Cached TruffleString.CodePointLengthNode codePointLengthNode, - @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode, + @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, @Exclusive @Cached HashingStorageSetItem setHashingStorageItem, @Cached HashingStorageLen lenNode, @Cached HashingStorageGetIterator getIter, @@ -1050,7 +1051,7 @@ static PDict doDict(VirtualFrame frame, Object cls, PDict from, Object to, Objec if (codePointLengthNode.execute(strKey, TS_ENCODING) != 1) { throw raiseNode.raise(inliningTarget, ValueError, ErrorMessages.STRING_KEYS_MUST_BE_LENGTH_1); } - int codePoint = codePointAtIndexNode.execute(strKey, 0, TS_ENCODING); + int codePoint = codePointAtIndexNode.execute(strKey, 0); destStorage = setHashingStorageItem.execute(frame, inliningTarget, destStorage, codePoint, currentValue); } } @@ -1072,18 +1073,18 @@ public abstract static class TranslateNode extends PythonBuiltinNode { @Specialization static TruffleString doStringString(TruffleString self, TruffleString table, @Cached TruffleString.CodePointLengthNode codePointLengthNode, - @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode, + @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, @Shared("createCpIterator") @Cached TruffleString.CreateCodePointIteratorNode createCodePointIteratorNode, @Shared("next") @Cached TruffleStringIterator.NextNode nextNode, @Shared("appendCp") @Cached TruffleStringBuilder.AppendCodePointNode appendCodePointNode, @Shared("toString") @Cached TruffleStringBuilder.ToStringNode toStringNode) { int tableLen = codePointLengthNode.execute(table, TS_ENCODING); TruffleStringIterator it = createCodePointIteratorNode.execute(self, TS_ENCODING); - TruffleStringBuilder sb = TruffleStringBuilder.create(TS_ENCODING, self.byteLength(TS_ENCODING)); + TruffleStringBuilderUTF32 sb = TruffleStringBuilder.createUTF32(self.byteLength(TS_ENCODING)); while (it.hasNext()) { int cp = nextNode.execute(it, TS_ENCODING); if (cp >= 0 && cp < tableLen) { - cp = codePointAtIndexNode.execute(table, cp, TS_ENCODING); + cp = codePointAtIndexNode.execute(table, cp); } appendCodePointNode.execute(sb, cp, 1, true); } @@ -1103,7 +1104,7 @@ static TruffleString doGeneric(VirtualFrame frame, Object self, Object table, @Shared("appendCp") @Cached TruffleStringBuilder.AppendCodePointNode appendCodePointNode, @Shared("toString") @Cached TruffleStringBuilder.ToStringNode toStringNode) { TruffleString selfStr = castSelfNode.cast(inliningTarget, self, ErrorMessages.REQUIRES_STR_OBJECT_BUT_RECEIVED_P, "translate", self); - TruffleStringBuilder sb = TruffleStringBuilder.create(TS_ENCODING, selfStr.byteLength(TS_ENCODING)); + TruffleStringBuilderUTF32 sb = TruffleStringBuilder.createUTF32(selfStr.byteLength(TS_ENCODING)); TruffleStringIterator it = createCodePointIteratorNode.execute(selfStr, TS_ENCODING); while (it.hasNext()) { int original = nextNode.execute(it, TS_ENCODING); @@ -1259,7 +1260,7 @@ protected ArgumentClinicProvider getArgumentClinic() { @SuppressWarnings("unused") static PList doStringNoSep(TruffleString self, PNone sep, int maxsplit, @Shared("cpLen") @Cached TruffleString.CodePointLengthNode codePointLengthNode, - @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode, + @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, @Shared("substring") @Cached TruffleString.SubstringNode substringNode, @Shared("appendNode") @Cached AppendNode appendNode) { return splitfields(self, maxsplit, appendNode, codePointLengthNode, codePointAtIndexNode, substringNode); @@ -1297,7 +1298,7 @@ static PList doStringSep(TruffleString self, TruffleString sep, int maxsplit, // See {@link PyString} private static PList splitfields(TruffleString s, int maxsplit, AppendNode appendNode, TruffleString.CodePointLengthNode codePointLengthNode, - TruffleString.CodePointAtIndexNode codePointAtIndexNode, + TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, TruffleString.SubstringNode substringNode) { /* * Result built here is a list of split parts, exactly as required for s.split(None, @@ -1319,7 +1320,7 @@ private static PList splitfields(TruffleString s, int maxsplit, AppendNode appen while (start < length) { // Find the next occurrence of non-whitespace while (start < length) { - if (!StringUtils.isSpace(codePointAtIndexNode.execute(s, start, TS_ENCODING))) { + if (!StringUtils.isSpace(codePointAtIndexNode.execute(s, start))) { // Break leaving start pointing at non-whitespace break; } @@ -1337,7 +1338,7 @@ private static PList splitfields(TruffleString s, int maxsplit, AppendNode appen } else { // The next segment runs up to the next next whitespace or end for (index = start; index < length; index++) { - if (StringUtils.isSpace(codePointAtIndexNode.execute(s, index, TS_ENCODING))) { + if (StringUtils.isSpace(codePointAtIndexNode.execute(s, index))) { // Break leaving index pointing at whitespace break; } @@ -1412,7 +1413,7 @@ static PList doStringMaxsplit(VirtualFrame frame, TruffleString s, @SuppressWarn @Shared("appendNode") @Cached AppendNode appendNode, @Shared("reverseNode") @Cached ListReverseNode reverseNode, @Shared("cpLen") @Cached TruffleString.CodePointLengthNode codePointLengthNode, - @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode, + @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, @Shared @Cached TruffleString.SubstringNode substringNode) { /* * Result built here is a list of split parts, exactly as required for s.split(None, @@ -1433,7 +1434,7 @@ static PList doStringMaxsplit(VirtualFrame frame, TruffleString s, @SuppressWarn int start = 0, end = length, splits = 0; for (int i = length - 1; i >= 0; i--) { - if (StringUtils.isSpace(codePointAtIndexNode.execute(s, i, TS_ENCODING))) { + if (StringUtils.isSpace(codePointAtIndexNode.execute(s, i))) { if (hasSegment) { appendNode.execute(list, substringNode.execute(s, start, end - start, TS_ENCODING, false)); hasSegment = false; @@ -1511,20 +1512,20 @@ static PList doStringKeepends(Node inliningTarget, TruffleString self, boolean k Object regexResult = invokeExecMethodNode.execute(inliningTarget, lineBreakRegex, self, lastEnd); matchFound = readIsMatchNode.execute(inliningTarget, regexResult); // TRegex reports UTF-32 matches as int indices - final int substringStartByteIndex = asByteIndex(lastEnd); + final int substringStartByteIndex = StringUtils.codepointIndexToByteIndex(lastEnd); final int substringByteLength; if (matchFound) { int end = TRegexUtil.TRegexResultAccessor.captureGroupEnd(regexResult, 0, inliningTarget, getEndNode); if (keepends) { - substringByteLength = asByteIndex(end - lastEnd); + substringByteLength = StringUtils.codepointIndexToByteIndex(end - lastEnd); } else { int start = TRegexUtil.TRegexResultAccessor.captureGroupStart(regexResult, 0, inliningTarget, getStartNode); - substringByteLength = asByteIndex(start - lastEnd); + substringByteLength = StringUtils.codepointIndexToByteIndex(start - lastEnd); } assert end > lastEnd : String.format("end: %d, lastEnd: %d", end, lastEnd); lastEnd = end; } else { - substringByteLength = self.byteLength(TS_ENCODING) - asByteIndex(lastEnd); + substringByteLength = self.byteLength(TS_ENCODING) - StringUtils.codepointIndexToByteIndex(lastEnd); if (substringByteLength == 0) { break; } @@ -1535,10 +1536,6 @@ static PList doStringKeepends(Node inliningTarget, TruffleString self, boolean k return list; } - private static int asByteIndex(int tregexResultIndex) { - assert TS_ENCODING == Encoding.UTF_32 : "byte index must be adapted when changing the language string encoding"; - return tregexResultIndex << 2; - } } } @@ -1585,7 +1582,7 @@ public abstract static class StripNode extends PythonBinaryBuiltinNode { @Specialization static TruffleString doStringString(TruffleString self, TruffleString chars, @Shared("cpLen") @Cached TruffleString.CodePointLengthNode codePointLengthNode, - @Shared("cpAtIndex") @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode, + @Shared("cpAtIndex") @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, @Shared("indexOf") @Cached TruffleString.IndexOfCodePointNode indexOfCodePointNode, @Shared("substring") @Cached TruffleString.SubstringNode substringNode) { return StringUtils.strip(self, chars, StripKind.BOTH, codePointLengthNode, codePointAtIndexNode, indexOfCodePointNode, substringNode); @@ -1594,7 +1591,7 @@ static TruffleString doStringString(TruffleString self, TruffleString chars, @Specialization static TruffleString doStringNone(TruffleString self, @SuppressWarnings("unused") PNone chars, @Shared("cpLen") @Cached TruffleString.CodePointLengthNode codePointLengthNode, - @Shared("cpAtIndex") @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode, + @Shared("cpAtIndex") @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, @Shared("substring") @Cached TruffleString.SubstringNode substringNode) { return StringUtils.strip(self, StripKind.BOTH, codePointLengthNode, codePointAtIndexNode, substringNode); } @@ -1605,7 +1602,7 @@ static TruffleString doGeneric(Object self, Object chars, @Cached CastToTruffleStringChecked2Node castSelfNode, @Cached CastToTruffleStringChecked2Node castCharsNode, @Shared("cpLen") @Cached TruffleString.CodePointLengthNode codePointLengthNode, - @Shared("cpAtIndex") @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode, + @Shared("cpAtIndex") @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, @Shared("indexOf") @Cached TruffleString.IndexOfCodePointNode indexOfCodePointNode, @Shared("substring") @Cached TruffleString.SubstringNode substringNode) { TruffleString selfStr = castSelfNode.cast(inliningTarget, self, ErrorMessages.REQUIRES_STR_OBJECT_BUT_RECEIVED_P, "strip", self); @@ -1623,7 +1620,7 @@ public abstract static class RStripNode extends PythonBinaryBuiltinNode { @Specialization static TruffleString doStringString(TruffleString self, TruffleString chars, @Shared("cpLen") @Cached TruffleString.CodePointLengthNode codePointLengthNode, - @Shared("cpAtIndex") @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode, + @Shared("cpAtIndex") @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, @Shared("indexOf") @Cached TruffleString.IndexOfCodePointNode indexOfCodePointNode, @Shared("substring") @Cached TruffleString.SubstringNode substringNode) { return StringUtils.strip(self, chars, StripKind.RIGHT, codePointLengthNode, codePointAtIndexNode, indexOfCodePointNode, substringNode); @@ -1632,7 +1629,7 @@ static TruffleString doStringString(TruffleString self, TruffleString chars, @Specialization static TruffleString doStringNone(TruffleString self, @SuppressWarnings("unused") PNone chars, @Shared("cpLen") @Cached TruffleString.CodePointLengthNode codePointLengthNode, - @Shared("cpAtIndex") @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode, + @Shared("cpAtIndex") @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, @Shared("substring") @Cached TruffleString.SubstringNode substringNode) { return StringUtils.strip(self, StripKind.RIGHT, codePointLengthNode, codePointAtIndexNode, substringNode); } @@ -1643,7 +1640,7 @@ static TruffleString doGeneric(Object self, Object chars, @Cached CastToTruffleStringChecked2Node castSelfNode, @Cached CastToTruffleStringChecked2Node castCharsNode, @Shared("cpLen") @Cached TruffleString.CodePointLengthNode codePointLengthNode, - @Shared("cpAtIndex") @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode, + @Shared("cpAtIndex") @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, @Shared("indexOf") @Cached TruffleString.IndexOfCodePointNode indexOfCodePointNode, @Shared("substring") @Cached TruffleString.SubstringNode substringNode) { TruffleString selfStr = castSelfNode.cast(inliningTarget, self, ErrorMessages.REQUIRES_STR_OBJECT_BUT_RECEIVED_P, "rstrip", self); @@ -1661,7 +1658,7 @@ public abstract static class LStripNode extends PythonBuiltinNode { @Specialization static TruffleString doStringString(TruffleString self, TruffleString chars, @Shared("cpLen") @Cached TruffleString.CodePointLengthNode codePointLengthNode, - @Shared("cpAtIndex") @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode, + @Shared("cpAtIndex") @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, @Shared("indexOf") @Cached TruffleString.IndexOfCodePointNode indexOfCodePointNode, @Shared("substring") @Cached TruffleString.SubstringNode substringNode) { return StringUtils.strip(self, chars, StripKind.LEFT, codePointLengthNode, codePointAtIndexNode, indexOfCodePointNode, substringNode); @@ -1670,7 +1667,7 @@ static TruffleString doStringString(TruffleString self, TruffleString chars, @Specialization static TruffleString doStringNone(TruffleString self, @SuppressWarnings("unused") PNone chars, @Shared("cpLen") @Cached TruffleString.CodePointLengthNode codePointLengthNode, - @Shared("cpAtIndex") @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode, + @Shared("cpAtIndex") @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, @Shared("substring") @Cached TruffleString.SubstringNode substringNode) { return StringUtils.strip(self, StripKind.LEFT, codePointLengthNode, codePointAtIndexNode, substringNode); } @@ -1681,7 +1678,7 @@ static TruffleString doGeneric(Object self, Object chars, @Cached CastToTruffleStringChecked2Node castSelfNode, @Cached CastToTruffleStringChecked2Node castCharsNode, @Shared("cpLen") @Cached TruffleString.CodePointLengthNode codePointLengthNode, - @Shared("cpAtIndex") @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode, + @Shared("cpAtIndex") @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, @Shared("indexOf") @Cached TruffleString.IndexOfCodePointNode indexOfCodePointNode, @Shared("substring") @Cached TruffleString.SubstringNode substringNode) { TruffleString selfStr = castSelfNode.cast(inliningTarget, self, ErrorMessages.REQUIRES_STR_OBJECT_BUT_RECEIVED_P, "lstrip", self); @@ -2198,7 +2195,7 @@ static TruffleString doGeneric(VirtualFrame frame, Object selfObj, Object widthO @Cached CastToTruffleStringChecked2Node castSelfNode, @Cached PyNumberAsSizeNode asSizeNode, @Cached TruffleString.CodePointLengthNode codePointLengthNode, - @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode, + @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, @Cached TruffleString.SubstringNode substringNode, @Cached TruffleStringBuilder.AppendCodePointNode appendCodePointNode, @Cached TruffleStringBuilder.AppendStringNode appendStringNode, @@ -2210,9 +2207,9 @@ static TruffleString doGeneric(VirtualFrame frame, Object selfObj, Object widthO return self; } int nzeros = width - len; - TruffleStringBuilder sb = TruffleStringBuilder.create(TS_ENCODING, tsbCapacity(nzeros + len)); + TruffleStringBuilderUTF32 sb = TruffleStringBuilder.createUTF32(tsbCapacity(nzeros + len)); if (len > 0) { - int start = codePointAtIndexNode.execute(self, 0, TS_ENCODING); + int start = codePointAtIndexNode.execute(self, 0); if (start == '+' || start == '-') { appendCodePointNode.execute(sb, start, 1, true); if (nzeros > 0) { @@ -2251,7 +2248,7 @@ static TruffleString doString(TruffleString self, @Cached TruffleString.ToJavaStringNode toJavaStringNode, @Cached TruffleString.FromJavaStringNode fromJavaStringNode, @Cached TruffleStringBuilder.ToStringNode toStringNode) { - TruffleStringBuilder sb = TruffleStringBuilder.create(TS_ENCODING, self.byteLength(TS_ENCODING)); + TruffleStringBuilderUTF32 sb = TruffleStringBuilder.createUTF32(self.byteLength(TS_ENCODING)); TruffleStringIterator it = createCodePointIteratorNode.execute(self, TS_ENCODING); int start = 0; int end = 0; @@ -2274,7 +2271,7 @@ static TruffleString doString(TruffleString self, } private static void appendSegment(TruffleString self, TruffleStringBuilder.AppendStringNode appendStringNode, TruffleString.SubstringNode substringNode, - TruffleString.ToJavaStringNode toJavaStringNode, TruffleString.FromJavaStringNode fromJavaStringNode, TruffleStringBuilder sb, int start, int end) { + TruffleString.ToJavaStringNode toJavaStringNode, TruffleString.FromJavaStringNode fromJavaStringNode, TruffleStringBuilderUTF32 sb, int start, int end) { TruffleString segment = substringNode.execute(self, start, end - start + 1, TS_ENCODING, true); String titleSegment = UCharacter.toTitleCase(Locale.ROOT, toJavaStringNode.execute(segment), null); appendStringNode.execute(sb, fromJavaStringNode.execute(titleSegment, TS_ENCODING)); @@ -2298,7 +2295,7 @@ TruffleString doIt(VirtualFrame frame, Object selfObj, Object width, Object fill @Cached PyNumberAsSizeNode asSizeNode, @Cached CastToTruffleStringChecked1Node castFillNode, @Cached TruffleString.CodePointLengthNode codePointLengthNode, - @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode, + @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, @Cached TruffleStringBuilder.AppendCodePointNode appendCodePointNode, @Cached TruffleStringBuilder.AppendStringNode appendStringNode, @Cached TruffleStringBuilder.ToStringNode toStringNode, @@ -2313,7 +2310,7 @@ TruffleString doIt(VirtualFrame frame, Object selfObj, Object width, Object fill if (errorProfile.profile(inliningTarget, codePointLengthNode.execute(fillStr, TS_ENCODING) != 1)) { throw raiseNode.raise(inliningTarget, TypeError, ErrorMessages.FILL_CHAR_MUST_BE_LENGTH_1); } - fillChar = codePointAtIndexNode.execute(fillStr, 0, TS_ENCODING); + fillChar = codePointAtIndexNode.execute(fillStr, 0); } return make(self, asSizeNode.executeExact(frame, inliningTarget, width), fillChar, codePointLengthNode, appendCodePointNode, appendStringNode, toStringNode); } @@ -2326,7 +2323,7 @@ private TruffleString make(TruffleString self, int width, int fillChar, TruffleS } int left = getLeftPaddingWidth(len, width); int right = width - len - left; - TruffleStringBuilder sb = TruffleStringBuilder.create(TS_ENCODING, tsbCapacity(len + left + right)); + TruffleStringBuilderUTF32 sb = TruffleStringBuilder.createUTF32(tsbCapacity(len + left + right)); if (left > 0) { appendCodePointNode.execute(sb, fillChar, left, true); } @@ -2404,14 +2401,14 @@ static TruffleString doGenericCachedStep(TruffleString value, SliceInfo slice, @Shared("len") @Cached LenOfRangeNode sliceLen, @Shared("appendCP") @Cached TruffleStringBuilder.AppendCodePointNode appendCodePointNode, @Shared("toStr") @Cached TruffleStringBuilder.ToStringNode toStringNode, - @Shared("cpAtIndex") @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode) { + @Shared("cpAtIndex") @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode) { int len = sliceLen.len(inliningTarget, slice); int start = slice.start; - TruffleStringBuilder sb = TruffleStringBuilder.create(TS_ENCODING, tsbCapacity(len)); + TruffleStringBuilderUTF32 sb = TruffleStringBuilder.createUTF32(tsbCapacity(len)); int j = 0; loopProfile.profileCounted(inliningTarget, len); for (int i = start; loopProfile.inject(inliningTarget, j < len); i += step) { - appendCodePointNode.execute(sb, codePointAtIndexNode.execute(value, i, TS_ENCODING), 1, true); + appendCodePointNode.execute(sb, codePointAtIndexNode.execute(value, i), 1, true); j++; } return toStringNode.execute(sb); @@ -2424,7 +2421,7 @@ static TruffleString doGeneric(TruffleString value, SliceInfo slice, @Shared("len") @Cached LenOfRangeNode sliceLen, @Shared("appendCP") @Cached TruffleStringBuilder.AppendCodePointNode appendCodePointNode, @Shared("toStr") @Cached TruffleStringBuilder.ToStringNode toStringNode, - @Shared("cpAtIndex") @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode) { + @Shared("cpAtIndex") @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode) { return doGenericCachedStep(value, slice, inliningTarget, slice.step, loopProfile, sliceLen, appendCodePointNode, toStringNode, codePointAtIndexNode); } } @@ -2610,7 +2607,7 @@ static TruffleString doString(TruffleString self, int tabsize, @Cached TruffleStringIterator.NextNode nextNode, @Cached TruffleStringBuilder.AppendCodePointNode appendCodePointNode, @Cached TruffleStringBuilder.ToStringNode toStringNode) { - TruffleStringBuilder sb = TruffleStringBuilder.create(TS_ENCODING, self.byteLength(TS_ENCODING)); + TruffleStringBuilderUTF32 sb = TruffleStringBuilder.createUTF32(self.byteLength(TS_ENCODING)); int linePos = 0; TruffleStringIterator it = createCodePointIteratorNode.execute(self, TS_ENCODING); // It's ok to iterate with charAt, we just pass surrogates through diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringNodes.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringNodes.java index ecdf5470b9..1f7217c5bd 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringNodes.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringNodes.java @@ -99,6 +99,7 @@ import com.oracle.truffle.api.profiles.InlinedConditionProfile; import com.oracle.truffle.api.strings.TruffleString; import com.oracle.truffle.api.strings.TruffleStringBuilder; +import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; import com.oracle.truffle.api.strings.TruffleStringIterator; public abstract class StringNodes { @@ -394,7 +395,7 @@ static TruffleString doString(TruffleString self, TruffleString arg, if (arg.isEmpty()) { return T_EMPTY_STRING; } - TruffleStringBuilder sb = TruffleStringBuilder.create(TS_ENCODING); + TruffleStringBuilderUTF32 sb = TruffleStringBuilder.createUTF32(); TruffleStringIterator it = createCodePointIteratorNode.execute(arg, TS_ENCODING); assert it.hasNext(); appendCodePointNode.execute(sb, nextNode.execute(it, TS_ENCODING), 1, true); @@ -438,7 +439,7 @@ static TruffleString doPSequence(TruffleString self, PSequence sequence, if (isSingleItemProfile.profile(inliningTarget, len == 1)) { return castToStringNode.execute(inliningTarget, item); } - TruffleStringBuilder sb = TruffleStringBuilder.create(TS_ENCODING); + TruffleStringBuilderUTF32 sb = TruffleStringBuilder.createUTF32(); appendStringNode.execute(sb, castToStringNode.execute(inliningTarget, item)); for (i = 1; i < len; i++) { @@ -472,7 +473,7 @@ static TruffleString doGeneric(VirtualFrame frame, TruffleString string, Object throw raise.raise(inliningTarget, PythonBuiltinClassType.TypeError, ErrorMessages.CAN_ONLY_JOIN_ITERABLE); } try { - TruffleStringBuilder sb = TruffleStringBuilder.create(TS_ENCODING); + TruffleStringBuilderUTF32 sb = TruffleStringBuilder.createUTF32(); Object next; try { next = nextNode.execute(frame, inliningTarget, iterator); @@ -514,15 +515,15 @@ static boolean isExactlyListOrTuple(Node inliningTarget, GetClassNode getClassNo @GenerateInline(false) // footprint reduction 36 -> 17 public abstract static class SpliceNode extends PNodeWithContext { - public abstract void execute(TruffleStringBuilder sb, Object translated); + public abstract void execute(TruffleStringBuilderUTF32 sb, Object translated); @Specialization(guards = "isNone(none)") @SuppressWarnings("unused") - static void doNone(TruffleStringBuilder sb, PNone none) { + static void doNone(TruffleStringBuilderUTF32 sb, PNone none) { } @Specialization - static void doInt(TruffleStringBuilder sb, int translated, + static void doInt(TruffleStringBuilderUTF32 sb, int translated, @Bind Node inliningTarget, @Shared("raise") @Cached PRaiseNode raise, @Shared @Cached TruffleStringBuilder.AppendCodePointNode appendCodePointNode) { @@ -534,7 +535,7 @@ static void doInt(TruffleStringBuilder sb, int translated, } @Specialization - static void doLong(TruffleStringBuilder sb, long translated, + static void doLong(TruffleStringBuilderUTF32 sb, long translated, @Bind Node inliningTarget, @Shared("raise") @Cached PRaiseNode raise, @Shared @Cached TruffleStringBuilder.AppendCodePointNode appendCodePointNode) { @@ -546,7 +547,7 @@ static void doLong(TruffleStringBuilder sb, long translated, } @Specialization - static void doPInt(TruffleStringBuilder sb, PInt translated, + static void doPInt(TruffleStringBuilderUTF32 sb, PInt translated, @Bind Node inliningTarget, @Shared("raise") @Cached PRaiseNode raise, @Shared @Cached TruffleStringBuilder.AppendCodePointNode appendCodePointNode) { @@ -558,13 +559,13 @@ static void doPInt(TruffleStringBuilder sb, PInt translated, } @Specialization - static void doString(TruffleStringBuilder sb, TruffleString translated, + static void doString(TruffleStringBuilderUTF32 sb, TruffleString translated, @Shared @Cached TruffleStringBuilder.AppendStringNode appendStringNode) { appendStringNode.execute(sb, translated); } @Specialization(guards = {"!isInteger(translated)", "!isPInt(translated)", "!isNone(translated)"}) - static void doObject(TruffleStringBuilder sb, Object translated, + static void doObject(TruffleStringBuilderUTF32 sb, Object translated, @Bind Node inliningTarget, @Exclusive @Cached PRaiseNode raise, @Cached CastToTruffleStringNode castToStringNode, @@ -662,7 +663,7 @@ static TruffleString doReplace(TruffleString self, TruffleString old, TruffleStr } int selfLen = self.byteLength(TS_ENCODING); int selfCpLen = codePointLengthNode.execute(self, TS_ENCODING); - TruffleStringBuilder sb = TruffleStringBuilder.create(TS_ENCODING, selfLen + with.byteLength(TS_ENCODING) * Math.min(maxCount, selfCpLen + 1)); + TruffleStringBuilderUTF32 sb = TruffleStringBuilder.createUTF32(selfLen + with.byteLength(TS_ENCODING) * Math.min(maxCount, selfCpLen + 1)); int replacements = 0; TruffleStringIterator it = createCodePointIteratorNode.execute(self, TS_ENCODING); int i = 0; @@ -688,7 +689,7 @@ static TruffleString doReplace(TruffleString self, TruffleString old, TruffleStr if (idx < 0) { return self; } else { - TruffleStringBuilder sb = TruffleStringBuilder.create(TS_ENCODING); + TruffleStringBuilderUTF32 sb = TruffleStringBuilder.createUTF32(); int start = 0; int replacements = 0; do { @@ -731,7 +732,7 @@ static TruffleString doString(TruffleString self, boolean hasDoubleQuote = indexOfCodePointNode.execute(self, '"', 0, selfLen, TS_ENCODING) >= 0; boolean useDoubleQuotes = hasSingleQuote && !hasDoubleQuote; - TruffleStringBuilder sb = TruffleStringBuilder.create(TS_ENCODING, tsbCapacity(selfLen + 2)); + TruffleStringBuilderUTF32 sb = TruffleStringBuilder.createUTF32(tsbCapacity(selfLen + 2)); TruffleStringIterator it = createCodePointIteratorNode.execute(self, TS_ENCODING); byte[] buffer = new byte[12]; appendCodePointNode.execute(sb, useDoubleQuotes ? '"' : '\'', 1, true); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringUtils.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringUtils.java index d36e0d85da..ddb1db140c 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringUtils.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringUtils.java @@ -48,8 +48,6 @@ import java.util.List; import java.util.Locale; -import com.oracle.truffle.api.HostCompilerDirectives.InliningCutoff; -import com.oracle.truffle.api.dsl.Fallback; import org.graalvm.nativeimage.ImageInfo; import org.graalvm.shadowed.com.ibm.icu.lang.UCharacter; import org.graalvm.shadowed.com.ibm.icu.lang.UCharacterCategory; @@ -57,8 +55,10 @@ import com.oracle.graal.python.util.PythonUtils; import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; +import com.oracle.truffle.api.HostCompilerDirectives.InliningCutoff; import com.oracle.truffle.api.dsl.Cached; import com.oracle.truffle.api.dsl.Cached.Shared; +import com.oracle.truffle.api.dsl.Fallback; import com.oracle.truffle.api.dsl.GenerateCached; import com.oracle.truffle.api.dsl.GenerateInline; import com.oracle.truffle.api.dsl.GenerateUncached; @@ -69,6 +69,7 @@ import com.oracle.truffle.api.strings.TruffleStringBuilder.AppendCodePointNode; import com.oracle.truffle.api.strings.TruffleStringBuilder.AppendLongNumberNode; import com.oracle.truffle.api.strings.TruffleStringBuilder.AppendStringNode; +import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; import com.oracle.truffle.api.strings.TruffleStringIterator; import com.oracle.truffle.regex.chardata.UnicodeCharacterAliases; @@ -157,13 +158,13 @@ public static boolean isSpace(int ch) { return isUnicodeWhitespace(ch); } - public static TruffleString strip(TruffleString str, StripKind stripKind, TruffleString.CodePointLengthNode codePointLengthNode, TruffleString.CodePointAtIndexNode codePointAtIndexNode, + public static TruffleString strip(TruffleString str, StripKind stripKind, TruffleString.CodePointLengthNode codePointLengthNode, TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, TruffleString.SubstringNode substringNode) { int i = 0; int len = codePointLengthNode.execute(str, TS_ENCODING); if (stripKind != StripKind.RIGHT) { while (i < len) { - int cp = codePointAtIndexNode.execute(str, i, TS_ENCODING); + int cp = codePointAtIndexNode.execute(str, i); if (!isSpace(cp)) { break; } @@ -175,7 +176,7 @@ public static TruffleString strip(TruffleString str, StripKind stripKind, Truffl if (stripKind != StripKind.LEFT) { j--; while (j >= i) { - int cp = codePointAtIndexNode.execute(str, j, TS_ENCODING); + int cp = codePointAtIndexNode.execute(str, j); if (!isSpace(cp)) { break; } @@ -188,7 +189,7 @@ public static TruffleString strip(TruffleString str, StripKind stripKind, Truffl } public static TruffleString strip(TruffleString str, TruffleString chars, StripKind stripKind, TruffleString.CodePointLengthNode codePointLengthNode, - TruffleString.CodePointAtIndexNode codePointAtIndexNode, TruffleString.IndexOfCodePointNode indexOfCodePointNode, TruffleString.SubstringNode substringNode) { + TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, TruffleString.IndexOfCodePointNode indexOfCodePointNode, TruffleString.SubstringNode substringNode) { int i = 0; int len = codePointLengthNode.execute(str, TS_ENCODING); int charsLen = codePointLengthNode.execute(chars, TS_ENCODING); @@ -196,7 +197,7 @@ public static TruffleString strip(TruffleString str, TruffleString chars, StripK // to avoid the linear search in chars if (stripKind != StripKind.RIGHT) { while (i < len) { - int cp = codePointAtIndexNode.execute(str, i, TS_ENCODING); + int cp = codePointAtIndexNode.execute(str, i); if (indexOfCodePointNode.execute(chars, cp, 0, charsLen, TS_ENCODING) < 0) { break; } @@ -208,7 +209,7 @@ public static TruffleString strip(TruffleString str, TruffleString chars, StripK if (stripKind != StripKind.LEFT) { j--; while (j >= i) { - int cp = codePointAtIndexNode.execute(str, j, TS_ENCODING); + int cp = codePointAtIndexNode.execute(str, j); if (indexOfCodePointNode.execute(chars, cp, 0, charsLen, TS_ENCODING) < 0) { break; } @@ -365,7 +366,7 @@ public static TruffleString joinUncached(TruffleString delimiter, Iterable> 2; + } } diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/tuple/StructSequenceBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/tuple/StructSequenceBuiltins.java index 56535627fc..8644f8e07a 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/tuple/StructSequenceBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/tuple/StructSequenceBuiltins.java @@ -47,7 +47,6 @@ import static com.oracle.graal.python.nodes.StringLiterals.T_RPAREN; import static com.oracle.graal.python.runtime.exception.PythonErrorType.NotImplementedError; import static com.oracle.graal.python.util.PythonUtils.EMPTY_TRUFFLESTRING_ARRAY; -import static com.oracle.graal.python.util.PythonUtils.TS_ENCODING; import java.util.ArrayList; import java.util.List; @@ -98,6 +97,7 @@ import com.oracle.truffle.api.nodes.Node; import com.oracle.truffle.api.strings.TruffleString; import com.oracle.truffle.api.strings.TruffleStringBuilder; +import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; @CoreFunctions(extendClasses = { PythonBuiltinClassType.PStatResult, @@ -199,7 +199,7 @@ static TruffleString repr(VirtualFrame frame, PTuple self, @Cached TruffleStringBuilder.AppendStringNode appendStringNode, @Cached TruffleStringBuilder.ToStringNode toStringNode) { Object type = getClassNode.execute(inliningTarget, self); - TruffleStringBuilder sb = TruffleStringBuilder.create(TS_ENCODING); + TruffleStringBuilderUTF32 sb = TruffleStringBuilder.createUTF32(); appendStringNode.execute(sb, getQName.execute(frame, type)); appendStringNode.execute(sb, T_LPAREN); SequenceStorage tupleStore = self.getSequenceStorage(); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/tuple/TupleBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/tuple/TupleBuiltins.java index 734ea5833d..99b702cbf2 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/tuple/TupleBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/tuple/TupleBuiltins.java @@ -35,7 +35,6 @@ import static com.oracle.graal.python.nodes.StringLiterals.T_LPAREN; import static com.oracle.graal.python.nodes.StringLiterals.T_RPAREN; import static com.oracle.graal.python.runtime.exception.PythonErrorType.TypeError; -import static com.oracle.graal.python.util.PythonUtils.TS_ENCODING; import static com.oracle.graal.python.util.PythonUtils.tsLiteral; import java.util.List; @@ -114,6 +113,7 @@ import com.oracle.truffle.api.profiles.InlinedConditionProfile; import com.oracle.truffle.api.strings.TruffleString; import com.oracle.truffle.api.strings.TruffleStringBuilder; +import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; @CoreFunctions(extendClasses = PythonBuiltinClassType.PTuple) public final class TupleBuiltins extends PythonBuiltins { @@ -268,7 +268,7 @@ public static TruffleString repr(VirtualFrame frame, Object self, return T_ELLIPSIS_IN_PARENS; } try { - TruffleStringBuilder buf = TruffleStringBuilder.create(TS_ENCODING); + TruffleStringBuilderUTF32 buf = TruffleStringBuilder.createUTF32(); appendStringNode.execute(buf, T_LPAREN); for (int i = 0; i < len - 1; i++) { appendStringNode.execute(buf, toString(frame, inliningTarget, getItemNode.execute(tupleStore, i), reprNode)); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/type/PythonManagedClass.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/type/PythonManagedClass.java index 0e4e474cbd..9a5692e6b7 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/type/PythonManagedClass.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/type/PythonManagedClass.java @@ -57,7 +57,7 @@ import com.oracle.truffle.api.nodes.Node; import com.oracle.truffle.api.object.Shape; import com.oracle.truffle.api.strings.TruffleString; -import com.oracle.truffle.api.strings.TruffleString.CodePointAtIndexNode; +import com.oracle.truffle.api.strings.TruffleString.CodePointAtIndexUTF32Node; import com.oracle.truffle.api.strings.TruffleString.CodePointLengthNode; public abstract class PythonManagedClass extends PythonObject implements PythonAbstractClass { @@ -219,7 +219,7 @@ public void setAttribute(TruffleString key, Object value) { public void onAttributeUpdate(TruffleString key, Object value) { callOnAttributeUpdateOnSubclasses(subClasses, key, value); methodResolutionOrder.invalidateFinalAttributeAssumption(key); - if (TpSlots.canBeSpecialMethod(key, CodePointLengthNode.getUncached(), CodePointAtIndexNode.getUncached())) { + if (TpSlots.canBeSpecialMethod(key, CodePointLengthNode.getUncached(), CodePointAtIndexUTF32Node.getUncached())) { if (this.tpSlots != null) { // This is called during type instantiation from copyDictSlots when the tp slots are // not initialized yet @@ -233,7 +233,7 @@ public static void onAttributeUpdateNative(PythonAbstractNativeObject nativeClas assert TypeNodes.IsTypeNode.executeUncached(nativeClass); callOnAttributeUpdateOnSubclasses(GetSubclassesNode.executeUncached(nativeClass), key, value); TypeNodes.GetMroStorageNode.executeUncached(nativeClass).invalidateFinalAttributeAssumption(key); - if (TpSlots.canBeSpecialMethod(key, CodePointLengthNode.getUncached(), CodePointAtIndexNode.getUncached())) { + if (TpSlots.canBeSpecialMethod(key, CodePointLengthNode.getUncached(), CodePointAtIndexUTF32Node.getUncached())) { TpSlots.updateSlot(nativeClass, key); } } diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/type/TpSlots.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/type/TpSlots.java index e1c1a6e142..1f8d100762 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/type/TpSlots.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/type/TpSlots.java @@ -1463,10 +1463,10 @@ public static TpSlots.Builder buildInherited(PythonClass klass, MroSequenceStora return klassSlots; } - public static boolean canBeSpecialMethod(TruffleString name, TruffleString.CodePointLengthNode codePointLengthNode, TruffleString.CodePointAtIndexNode codePointAtIndexNode) { + public static boolean canBeSpecialMethod(TruffleString name, TruffleString.CodePointLengthNode codePointLengthNode, TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode) { int len = codePointLengthNode.execute(name, TS_ENCODING); - return len > 5 && codePointAtIndexNode.execute(name, len - 2, TS_ENCODING) == '_' && codePointAtIndexNode.execute(name, len - 1, TS_ENCODING) == '_' && - codePointAtIndexNode.execute(name, 1, TS_ENCODING) == '_' && codePointAtIndexNode.execute(name, 0, TS_ENCODING) == '_'; + return len > 5 && codePointAtIndexNode.execute(name, len - 2) == '_' && codePointAtIndexNode.execute(name, len - 1) == '_' && + codePointAtIndexNode.execute(name, 1) == '_' && codePointAtIndexNode.execute(name, 0) == '_'; } public static boolean isSpecialMethod(TruffleString name) { diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/types/GenericAliasBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/types/GenericAliasBuiltins.java index 7297e915c3..792357d470 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/types/GenericAliasBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/types/GenericAliasBuiltins.java @@ -134,6 +134,7 @@ import com.oracle.truffle.api.profiles.InlinedBranchProfile; import com.oracle.truffle.api.strings.TruffleString; import com.oracle.truffle.api.strings.TruffleStringBuilder; +import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; @CoreFunctions(extendClasses = PythonBuiltinClassType.PGenericAlias) public final class GenericAliasBuiltins extends PythonBuiltins { @@ -242,7 +243,7 @@ static Object repr(VirtualFrame frame, PGenericAlias self, @TruffleBoundary static Object reprBoundary(PGenericAlias self) { - TruffleStringBuilder sb = TruffleStringBuilder.create(TS_ENCODING); + TruffleStringBuilderUTF32 sb = TruffleStringBuilder.createUTF32(); if (self.isStarred()) { sb.appendCodePointUncached('*'); } @@ -270,7 +271,7 @@ static Object reprBoundary(PGenericAlias self) { } // Equivalent of ga_repr_item in CPython - private static void reprItem(TruffleStringBuilder sb, Object obj) { + private static void reprItem(TruffleStringBuilderUTF32 sb, Object obj) { if (obj == PEllipsis.INSTANCE) { sb.appendStringUncached(StringLiterals.T_ELLIPSIS); return; @@ -278,7 +279,7 @@ private static void reprItem(TruffleStringBuilder sb, Object obj) { GenericTypeNodes.reprItem(sb, obj); } - private static void reprItemsList(TruffleStringBuilder sb, PList list) { + private static void reprItemsList(TruffleStringBuilderUTF32 sb, PList list) { sb.appendCodePointUncached('['); SequenceStorage storage = list.getSequenceStorage(); for (int i = 0; i < storage.length(); i++) { diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/types/GenericTypeNodes.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/types/GenericTypeNodes.java index ba407872fe..b1fb359a91 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/types/GenericTypeNodes.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/types/GenericTypeNodes.java @@ -91,7 +91,7 @@ import com.oracle.truffle.api.frame.VirtualFrame; import com.oracle.truffle.api.nodes.Node; import com.oracle.truffle.api.strings.TruffleString; -import com.oracle.truffle.api.strings.TruffleStringBuilder; +import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; public abstract class GenericTypeNodes { @@ -106,7 +106,7 @@ private static Object getItemUncached(SequenceStorage storage, int i) { return SequenceStorageNodes.GetItemScalarNode.executeUncached(storage, i); } - static void reprItem(TruffleStringBuilder sb, Object obj) { + static void reprItem(TruffleStringBuilderUTF32 sb, Object obj) { PyObjectLookupAttr lookup = PyObjectLookupAttr.getUncached(); PyObjectStrAsTruffleStringNode str = PyObjectStrAsTruffleStringNode.getUncached(); Object origin = lookup.execute(null, null, obj, T___ORIGIN__); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/types/UnionTypeBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/types/UnionTypeBuiltins.java index c38747c653..3e53b1f8a3 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/types/UnionTypeBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/types/UnionTypeBuiltins.java @@ -101,6 +101,7 @@ import com.oracle.truffle.api.profiles.InlinedBranchProfile; import com.oracle.truffle.api.strings.TruffleString; import com.oracle.truffle.api.strings.TruffleStringBuilder; +import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; @CoreFunctions(extendClasses = PythonBuiltinClassType.PUnionType) public final class UnionTypeBuiltins extends PythonBuiltins { @@ -162,7 +163,7 @@ Object repr(VirtualFrame frame, PUnionType self, @TruffleBoundary private TruffleString reprBoundary(PUnionType self) { - TruffleStringBuilder sb = TruffleStringBuilder.create(TS_ENCODING); + TruffleStringBuilderUTF32 sb = TruffleStringBuilder.createUTF32(); SequenceStorage argsStorage = self.getArgs().getSequenceStorage(); for (int i = 0; i < argsStorage.length(); i++) { if (i > 0) { @@ -174,7 +175,7 @@ private TruffleString reprBoundary(PUnionType self) { } // Equivalent of union_repr_item in CPython - private static void reprItem(TruffleStringBuilder sb, Object obj) { + private static void reprItem(TruffleStringBuilderUTF32 sb, Object obj) { if (IsSameTypeNode.executeUncached(obj, PythonBuiltinClassType.PNone)) { sb.appendStringUncached(StringLiterals.T_NONE); return; diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/compiler/Unparser.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/compiler/Unparser.java index 5e5a7f69fc..a4300e4889 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/compiler/Unparser.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/compiler/Unparser.java @@ -77,6 +77,7 @@ import com.oracle.graal.python.runtime.formatting.InternalFormat.Spec; import com.oracle.truffle.api.strings.TruffleString; import com.oracle.truffle.api.strings.TruffleStringBuilder; +import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; public class Unparser implements SSTreeVisitor { public static TruffleString unparse(SSTNode node) { @@ -84,17 +85,17 @@ public static TruffleString unparse(SSTNode node) { } private static TruffleString unparse(SSTNode node, int level) { - TruffleStringBuilder builder = TruffleStringBuilder.create(TS_ENCODING); + TruffleStringBuilderUTF32 builder = TruffleStringBuilder.createUTF32(); node.accept(new Unparser(builder, level)); return builder.toStringUncached(); } - private Unparser(TruffleStringBuilder builder, int level) { + private Unparser(TruffleStringBuilderUTF32 builder, int level) { this.builder = builder; this.level = level; } - private TruffleStringBuilder builder; + private TruffleStringBuilderUTF32 builder; private int level; private static final int PR_TUPLE = 0; @@ -172,8 +173,8 @@ private void appendFStringElement(ExprTy e, boolean isFormatSpec) { } public TruffleString buildFStringBody(ExprTy[] values, boolean isFormatSpec) { - TruffleStringBuilder savedBuilder = builder; - builder = TruffleStringBuilder.create(TS_ENCODING); + TruffleStringBuilderUTF32 savedBuilder = builder; + builder = TruffleStringBuilder.createUTF32(); for (int i = 0; i < values.length; i++) { appendFStringElement(values[i], isFormatSpec); } diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/lib/PyMemoryViewFromObject.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/lib/PyMemoryViewFromObject.java index d00c19f665..da5db1690a 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/lib/PyMemoryViewFromObject.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/lib/PyMemoryViewFromObject.java @@ -141,7 +141,7 @@ static PMemoryView fromManaged(VirtualFrame frame, Object object, @Cached CallNode callNode, @Cached MemoryViewNodes.InitFlagsNode initFlagsNode, @Cached TruffleString.CodePointLengthNode lengthNode, - @Cached TruffleString.CodePointAtIndexNode atIndexNode, + @Cached TruffleString.CodePointAtIndexUTF32Node atIndexNode, @Exclusive @Cached PRaiseNode raiseNode) { Object typeObj = getClassNode.execute(inliningTarget, object); assert typeObj instanceof PythonBuiltinClassType || typeObj instanceof PythonAbstractObject; diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/lib/PyObjectFunctionStr.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/lib/PyObjectFunctionStr.java index 8852dc79c8..1fed50f55c 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/lib/PyObjectFunctionStr.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/lib/PyObjectFunctionStr.java @@ -52,6 +52,7 @@ import com.oracle.truffle.api.frame.VirtualFrame; import com.oracle.truffle.api.strings.TruffleString; import com.oracle.truffle.api.strings.TruffleStringBuilder; +import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; /** * Obtains a string representation of a function for error reporting. Equivalent of CPython's @@ -79,7 +80,7 @@ public static TruffleString executeUncached(Object function) { if (!(module instanceof PNone)) { TruffleString moduleStr = asStr.execute(null, null, module); if (!T_BUILTINS.equalsUncached(moduleStr, TS_ENCODING)) { - TruffleStringBuilder sb = TruffleStringBuilder.create(TS_ENCODING); + TruffleStringBuilderUTF32 sb = TruffleStringBuilder.createUTF32(); sb.appendStringUncached(moduleStr); sb.appendCodePointUncached('.'); sb.appendStringUncached(qualnameStr); @@ -88,7 +89,7 @@ public static TruffleString executeUncached(Object function) { return sb.toStringUncached(); } } - TruffleStringBuilder sb = TruffleStringBuilder.create(TS_ENCODING); + TruffleStringBuilderUTF32 sb = TruffleStringBuilder.createUTF32(); sb.appendStringUncached(qualnameStr); sb.appendCodePointUncached('('); sb.appendCodePointUncached(')'); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/lib/PyObjectGetAttr.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/lib/PyObjectGetAttr.java index cde5e3a357..f980d8e8d2 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/lib/PyObjectGetAttr.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/lib/PyObjectGetAttr.java @@ -99,7 +99,7 @@ static Object getDynamicAttr(Frame frame, Node inliningTarget, Object receiver, @Cached CallSlotGetAttrNode callGetAttrNode, @Cached AttributeErrorBuiltins.SetAttributeErrorContext setContext, @Cached TruffleString.CodePointLengthNode codePointLengthNode, - @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode) { + @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode) { Object type = getClass.execute(inliningTarget, receiver); var slots = getSlotsNode.execute(inliningTarget, receiver); if (!codePointLengthNode.isAdoptable()) { diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/lib/PyObjectGetAttrO.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/lib/PyObjectGetAttrO.java index 2f542cf8f7..41a49c0b36 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/lib/PyObjectGetAttrO.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/lib/PyObjectGetAttrO.java @@ -76,7 +76,7 @@ static Object getDynamicAttr(Frame frame, Node inliningTarget, Object receiver, @Cached GetCachedTpSlotsNode getSlotsNode, @Cached CallSlotGetAttrONode callGetAttrNode, @Cached TruffleString.CodePointLengthNode codePointLengthNode, - @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode) { + @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode) { Object type = getClass.execute(inliningTarget, receiver); var slots = getSlotsNode.execute(inliningTarget, type); if (!codePointLengthNode.isAdoptable() && name instanceof TruffleString tsName) { diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/lib/PyObjectLookupAttr.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/lib/PyObjectLookupAttr.java index c967b2738f..6921d55b81 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/lib/PyObjectLookupAttr.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/lib/PyObjectLookupAttr.java @@ -132,7 +132,7 @@ protected static boolean isBuiltinTypeType(Object type) { return type == PythonBuiltinClassType.PythonClass; } - protected static boolean isTypeSlot(TruffleString name, TruffleString.CodePointLengthNode codePointLengthNode, TruffleString.CodePointAtIndexNode codePointAtIndexNode) { + protected static boolean isTypeSlot(TruffleString name, TruffleString.CodePointLengthNode codePointLengthNode, TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode) { return TpSlots.canBeSpecialMethod(name, codePointLengthNode, codePointAtIndexNode) || name.equalsUncached(T_MRO, TS_ENCODING); } @@ -207,7 +207,7 @@ static Object doBuiltinTypeType(VirtualFrame frame, Node inliningTarget, Object @Exclusive @Cached CallSlotDescrGet callGetSlot, /* GR-44836 @Shared */ @Exclusive @Cached IsBuiltinObjectProfile errorProfile, @Shared @Cached TruffleString.CodePointLengthNode codePointLengthNode, - @Shared @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode) { + @Shared @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode) { Object value = readNode.execute(object, name); if (valueFound.profile(inliningTarget, value != PNone.NO_VALUE)) { var valueSlots = getSlotsNode.execute(inliningTarget, value); @@ -270,7 +270,7 @@ static Object getDynamicAttr(Frame frame, Node inliningTarget, Object receiver, @Exclusive @Cached CallSlotGetAttrNode callGetattribute, /* GR-44836 @Shared */ @Exclusive @Cached IsBuiltinObjectProfile errorProfile, @Shared @Cached TruffleString.CodePointLengthNode codePointLengthNode, - @Shared @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode) { + @Shared @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode) { Object type = getClass.execute(inliningTarget, receiver); TpSlots slots = getSlotsNode.execute(inliningTarget, type); if (!codePointLengthNode.isAdoptable()) { @@ -316,14 +316,14 @@ public static PyObjectLookupAttr getUncached() { * guaranteed to be a {@code java.lang.TruffleString}. */ static Object readAttributeQuickly(Object type, TpSlots slots, Object receiver, TruffleString stringName, TruffleString.CodePointLengthNode codePointLengthNode, - TruffleString.CodePointAtIndexNode codePointAtIndexNode) { + TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode) { if (slots.tp_getattro() == ObjectBuiltins.SLOTS.tp_getattro() && type instanceof PythonManagedClass) { PythonAbstractClass[] bases = ((PythonManagedClass) type).getBaseClasses(); if (bases.length == 1) { PythonAbstractClass base = bases[0]; if (base instanceof PythonBuiltinClass && ((PythonBuiltinClass) base).getType() == PythonBuiltinClassType.PythonObject) { - if (!(codePointAtIndexNode.execute(stringName, 0, TS_ENCODING) == '_' && codePointAtIndexNode.execute(stringName, 1, TS_ENCODING) == '_')) { + if (!(codePointAtIndexNode.execute(stringName, 0) == '_' && codePointAtIndexNode.execute(stringName, 1) == '_')) { // not a special name, so this attribute cannot be inherited, and can // only be on the type or the object. If it's on the type, return to // the generic code. diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/lib/PyObjectLookupAttrO.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/lib/PyObjectLookupAttrO.java index e6bcd0c45c..717270fb8e 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/lib/PyObjectLookupAttrO.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/lib/PyObjectLookupAttrO.java @@ -80,7 +80,7 @@ static Object getDynamicAttr(Frame frame, Node inliningTarget, Object receiver, @Cached CallSlotGetAttrONode callGetattribute, @Cached IsBuiltinObjectProfile errorProfile, @Cached TruffleString.CodePointLengthNode codePointLengthNode, - @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode) { + @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode) { Object type = getClass.execute(inliningTarget, receiver); TpSlots slots = getSlotsNode.execute(inliningTarget, type); if (!codePointLengthNode.isAdoptable() && name instanceof TruffleString tsName) { diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/lib/PyUnicodeReadCharNode.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/lib/PyUnicodeReadCharNode.java index 1ef0623d30..6e525873ce 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/lib/PyUnicodeReadCharNode.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/lib/PyUnicodeReadCharNode.java @@ -73,7 +73,7 @@ public abstract class PyUnicodeReadCharNode extends PNodeWithContext { static int doGeneric(Node inliningTarget, Object type, long lindex, @Cached CastToTruffleStringNode castToStringNode, @Cached TruffleString.CodePointLengthNode codePointLengthNode, - @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode, + @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, @Cached PRaiseNode raiseNode) { try { TruffleString s = castToStringNode.execute(inliningTarget, type); @@ -82,7 +82,7 @@ static int doGeneric(Node inliningTarget, Object type, long lindex, if (index < 0 || index >= codePointLengthNode.execute(s, TS_ENCODING)) { throw raiseNode.raise(inliningTarget, IndexError, ErrorMessages.STRING_INDEX_OUT_OF_RANGE); } - return codePointAtIndexNode.execute(s, index, TS_ENCODING); + return codePointAtIndexNode.execute(s, index); } catch (CannotCastException e) { throw raiseNode.raise(inliningTarget, TypeError, ErrorMessages.BAD_ARG_TYPE_FOR_BUILTIN_OP); } catch (OverflowException e) { diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/argument/CreateArgumentsNode.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/argument/CreateArgumentsNode.java index b2943f4f46..e2298e39a3 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/argument/CreateArgumentsNode.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/argument/CreateArgumentsNode.java @@ -92,6 +92,7 @@ import com.oracle.truffle.api.profiles.InlinedIntValueProfile; import com.oracle.truffle.api.strings.TruffleString; import com.oracle.truffle.api.strings.TruffleStringBuilder; +import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; @GenerateUncached @GenerateInline @@ -630,7 +631,7 @@ protected static PException raiseMissing(Node inliningTarget, Object callable, T @TruffleBoundary private static TruffleString joinArgNames(TruffleString[] missingNames, int missingCnt) { - TruffleStringBuilder sb = TruffleStringBuilder.create(TS_ENCODING); + TruffleStringBuilderUTF32 sb = TruffleStringBuilder.createUTF32(); sb.appendStringUncached(missingNames[0]); if (missingCnt == 2) { sb.appendStringUncached(toTruffleStringUncached("' and '")); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/bytecode/ImportStarNode.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/bytecode/ImportStarNode.java index 09640e94c6..2bb99afb1a 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/bytecode/ImportStarNode.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/bytecode/ImportStarNode.java @@ -92,7 +92,7 @@ void doImport(VirtualFrame frame, TruffleString moduleName, int level, @Cached InlinedConditionProfile javaImport, @Cached CastToTruffleStringNode castToTruffleStringNode, @Cached TruffleString.CodePointLengthNode codePointLengthNode, - @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode, + @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, @Cached IsBuiltinObjectProfile isAttributeErrorProfile) { Object importedModule = importModule(frame, moduleName, PArguments.getGlobals(frame), T_IMPORT_ALL, level, importNameNode); Object locals = PArguments.getSpecialArgument(frame); @@ -152,7 +152,7 @@ private static void writeAttribute(VirtualFrame frame, Node inliningTarget, Obje } private void writeAttributeToLocals(VirtualFrame frame, Node inliningTarget, TruffleString moduleName, PythonModule importedModule, Object locals, Object attrName, boolean fromAll, - CastToTruffleStringNode castToTruffleStringNode, TruffleString.CodePointLengthNode cpLenNode, TruffleString.CodePointAtIndexNode cpAtIndexNode, PyObjectGetAttr getAttr, + CastToTruffleStringNode castToTruffleStringNode, TruffleString.CodePointLengthNode cpLenNode, TruffleString.CodePointAtIndexUTF32Node cpAtIndexNode, PyObjectGetAttr getAttr, PyObjectSetItem dictWriteNode, PyObjectSetAttr setAttrNode) { try { TruffleString name = castToTruffleStringNode.execute(inliningTarget, attrName); @@ -170,8 +170,8 @@ private void writeAttributeToLocals(VirtualFrame frame, Node inliningTarget, Tru } } - private static boolean startsWithUnderscore(TruffleString s, TruffleString.CodePointLengthNode cpLenNode, TruffleString.CodePointAtIndexNode cpAtIndexNode) { - return cpLenNode.execute(s, TS_ENCODING) > 0 && cpAtIndexNode.execute(s, 0, TS_ENCODING) == '_'; + private static boolean startsWithUnderscore(TruffleString s, TruffleString.CodePointLengthNode cpLenNode, TruffleString.CodePointAtIndexUTF32Node cpAtIndexNode) { + return cpLenNode.execute(s, TS_ENCODING) > 0 && cpAtIndexNode.execute(s, 0) == '_'; } public static ImportStarNode create() { diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/function/builtins/clinic/CodePointConversionNode.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/function/builtins/clinic/CodePointConversionNode.java index cba1c5ccdf..1ef6c50358 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/function/builtins/clinic/CodePointConversionNode.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/function/builtins/clinic/CodePointConversionNode.java @@ -86,12 +86,12 @@ int doOthers(Object value, @Bind Node inliningTarget, @Cached CastToTruffleStringNode castToStringNode, @Cached TruffleString.CodePointLengthNode codePointLengthNode, - @Cached TruffleString.CodePointAtIndexNode codePointAtIndexNode, + @Cached TruffleString.CodePointAtIndexUTF32Node codePointAtIndexNode, @Cached PRaiseNode raiseNode) { try { TruffleString str = castToStringNode.execute(inliningTarget, value); if (codePointLengthNode.execute(str, TS_ENCODING) == 1) { - return codePointAtIndexNode.execute(str, 0, TS_ENCODING); + return codePointAtIndexNode.execute(str, 0); } } catch (CannotCastException ex) { // handled below diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/statement/AbstractImportNode.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/statement/AbstractImportNode.java index 37ae1d77a1..a5e31e4f3a 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/statement/AbstractImportNode.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/statement/AbstractImportNode.java @@ -100,6 +100,7 @@ import com.oracle.truffle.api.profiles.InlinedConditionProfile; import com.oracle.truffle.api.strings.TruffleString; import com.oracle.truffle.api.strings.TruffleStringBuilder; +import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; public abstract class AbstractImportNode extends PNodeWithContext { @@ -591,7 +592,7 @@ TruffleString resolveName(VirtualFrame frame, TruffleString name, Object globals return base; } - TruffleStringBuilder sb = TruffleStringBuilder.create(TS_ENCODING, base.byteLength(TS_ENCODING) + tsbCapacity(1) + name.byteLength(TS_ENCODING)); + TruffleStringBuilderUTF32 sb = TruffleStringBuilder.createUTF32(base.byteLength(TS_ENCODING) + tsbCapacity(1) + name.byteLength(TS_ENCODING)); appendStringNode.execute(sb, base); appendStringNode.execute(sb, T_DOT); appendStringNode.execute(sb, name); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/object/PFactory.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/object/PFactory.java index e12f20ff52..107e1596f4 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/object/PFactory.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/object/PFactory.java @@ -469,7 +469,7 @@ public static PMemoryView createMemoryView(PythonLanguage language, PythonContex } public static PMemoryView createMemoryViewForManagedObject(PythonLanguage language, Object buffer, Object owner, int itemsize, int length, boolean readonly, TruffleString format, - TruffleString.CodePointLengthNode lengthNode, TruffleString.CodePointAtIndexNode atIndexNode) { + TruffleString.CodePointLengthNode lengthNode, TruffleString.CodePointAtIndexUTF32Node atIndexNode) { PythonBuiltinClassType cls = PythonBuiltinClassType.PMemoryView; return new PMemoryView(cls, cls.getInstanceShape(language), null, null, buffer, owner, length, readonly, itemsize, BufferFormat.forMemoryView(format, lengthNode, atIndexNode), format, 1, null, 0, new int[]{length / itemsize}, new int[]{itemsize}, null, diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/sequence/storage/BoolSequenceStorage.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/sequence/storage/BoolSequenceStorage.java index 85f010cd26..256bd20354 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/sequence/storage/BoolSequenceStorage.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/sequence/storage/BoolSequenceStorage.java @@ -73,8 +73,8 @@ public void insertBoolItem(int idx, boolean value) { ensureCapacity(length + 1); // shifting tail to the right by one slot - for (int i = values.length - 1; i > idx; i--) { - values[i] = values[i - 1]; + if (idx < length) { + PythonUtils.arraycopy(values, idx, values, idx + 1, length - idx); } values[idx] = value; diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/sequence/storage/ByteSequenceStorage.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/sequence/storage/ByteSequenceStorage.java index 60aa9ae8ec..1a4f8f9297 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/sequence/storage/ByteSequenceStorage.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/sequence/storage/ByteSequenceStorage.java @@ -107,8 +107,8 @@ public void insertByteItem(int idx, byte value) { ensureCapacity(length + 1); // shifting tail to the right by one slot - for (int i = values.length - 1; i > idx; i--) { - values[i] = values[i - 1]; + if (idx < length) { + PythonUtils.arraycopy(values, idx, values, idx + 1, length - idx); } values[idx] = value; @@ -120,13 +120,10 @@ public int indexOfByte(byte value) { } public int indexOfInt(int value) { - for (int i = 0; i < length; i++) { - if ((values[i] & 0xFF) == value) { - return i; - } + if ((value & 0xFF) != value) { + return -1; } - - return -1; + return ArrayUtils.indexOf(values, 0, length, (byte) value); } @Override diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/sequence/storage/DoubleSequenceStorage.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/sequence/storage/DoubleSequenceStorage.java index b78f0af08f..1a90f0fff4 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/sequence/storage/DoubleSequenceStorage.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/sequence/storage/DoubleSequenceStorage.java @@ -103,8 +103,8 @@ public void insertDoubleItem(int idx, double value) { ensureCapacity(length + 1); // shifting tail to the right by one slot - for (int i = values.length - 1; i > idx; i--) { - values[i] = values[i - 1]; + if (idx < length) { + PythonUtils.arraycopy(values, idx, values, idx + 1, length - idx); } values[idx] = value; diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/sequence/storage/IntSequenceStorage.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/sequence/storage/IntSequenceStorage.java index 444c76e8a2..f450c1a47f 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/sequence/storage/IntSequenceStorage.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/sequence/storage/IntSequenceStorage.java @@ -103,8 +103,8 @@ public void insertIntItem(int idx, int value) { ensureCapacity(length + 1); // shifting tail to the right by one slot - for (int i = values.length - 1; i > idx; i--) { - values[i] = values[i - 1]; + if (idx < length) { + PythonUtils.arraycopy(values, idx, values, idx + 1, length - idx); } values[idx] = value; diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/sequence/storage/LongSequenceStorage.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/sequence/storage/LongSequenceStorage.java index 16b1c731f6..fbbe102bfc 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/sequence/storage/LongSequenceStorage.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/sequence/storage/LongSequenceStorage.java @@ -103,8 +103,8 @@ public void insertLongItem(int idx, long value) { ensureCapacity(length + 1); // shifting tail to the right by one slot - for (int i = values.length - 1; i > idx; i--) { - values[i] = values[i - 1]; + if (idx < length) { + PythonUtils.arraycopy(values, idx, values, idx + 1, length - idx); } values[idx] = value; diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/sequence/storage/ObjectSequenceStorage.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/sequence/storage/ObjectSequenceStorage.java index 47698e9dc3..f8b4d83505 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/sequence/storage/ObjectSequenceStorage.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/sequence/storage/ObjectSequenceStorage.java @@ -101,14 +101,20 @@ public void insertItem(int idx, Object value) { ensureCapacity(length + 1); // shifting tail to the right by one slot - for (int i = values.length - 1; i > idx; i--) { - values[i] = values[i - 1]; + if (idx < length) { + PythonUtils.arraycopy(values, idx, values, idx + 1, length - idx); } values[idx] = assertNoJavaString(value); incLength(); } + public void appendItem(Object value) { + ensureCapacity(length + 1); + values[length] = assertNoJavaString(value); + incLength(); + } + public void increaseCapacityExactWithCopy(int newCapacity) { values = PythonUtils.arrayCopyOf(values, newCapacity); capacity = values.length; diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/util/ArrayBuilder.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/util/ArrayBuilder.java index 2e185c2796..31578a8685 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/util/ArrayBuilder.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/util/ArrayBuilder.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0 @@ -60,6 +60,10 @@ public ArrayBuilder(int capacity) { this.data = new Object[capacity]; } + public boolean isEmpty() { + return size == 0; + } + public void add(Object item) { if (size == data.length) { try { @@ -78,6 +82,18 @@ public T get(int index) { return (T) data[index]; } + @SuppressWarnings("unchecked") + public T pop() { + assert size > 0; + return (T) data[--size]; + } + + @SuppressWarnings("unchecked") + public T peek() { + assert size > 0; + return (T) data[size - 1]; + } + @SuppressWarnings("unchecked") public T[] toArray(T[] newArray) { return (T[]) arrayCopyOf(data, size, newArray.getClass()); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/util/BufferFormat.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/util/BufferFormat.java index 9153526533..75ab0a9043 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/util/BufferFormat.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/util/BufferFormat.java @@ -87,13 +87,13 @@ public enum BufferFormat { this.baseTypeCode = toTruffleStringUncached(baseTypeCode); } - public static BufferFormat forMemoryView(TruffleString formatString, TruffleString.CodePointLengthNode lengthNode, TruffleString.CodePointAtIndexNode atIndexNode) { + public static BufferFormat forMemoryView(TruffleString formatString, TruffleString.CodePointLengthNode lengthNode, TruffleString.CodePointAtIndexUTF32Node atIndexNode) { char fmtchar; int length = lengthNode.execute(formatString, TS_ENCODING); if (length == 1) { - fmtchar = (char) atIndexNode.execute(formatString, 0, TS_ENCODING); - } else if (length == 2 && atIndexNode.execute(formatString, 0, TS_ENCODING) == '@') { - fmtchar = (char) atIndexNode.execute(formatString, 1, TS_ENCODING); + fmtchar = (char) atIndexNode.execute(formatString, 0); + } else if (length == 2 && atIndexNode.execute(formatString, 0) == '@') { + fmtchar = (char) atIndexNode.execute(formatString, 1); } else { return OTHER; } @@ -112,10 +112,10 @@ public static BufferFormat forMemoryView(TruffleString formatString, TruffleStri return format != null ? format : OTHER; } - public static BufferFormat forArray(TruffleString formatString, TruffleString.CodePointLengthNode lengthNode, TruffleString.CodePointAtIndexNode atIndexNode) { + public static BufferFormat forArray(TruffleString formatString, TruffleString.CodePointLengthNode lengthNode, TruffleString.CodePointAtIndexUTF32Node atIndexNode) { int length = lengthNode.execute(formatString, TS_ENCODING); if (length == 1) { - char fmtchar = (char) atIndexNode.execute(formatString, 0, TS_ENCODING); + char fmtchar = (char) atIndexNode.execute(formatString, 0); if (fmtchar == 'u') { return UNICODE; } diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/util/CharsetMapping.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/util/CharsetMapping.java index 8189b5cc19..ce80616b39 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/util/CharsetMapping.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/util/CharsetMapping.java @@ -69,6 +69,7 @@ import com.oracle.truffle.api.nodes.Node; import com.oracle.truffle.api.strings.TruffleString; import com.oracle.truffle.api.strings.TruffleStringBuilder; +import com.oracle.truffle.api.strings.TruffleStringBuilderUTF32; import com.oracle.truffle.api.strings.TruffleStringIterator; /** @@ -160,7 +161,7 @@ static TruffleString normalize(TruffleString encoding, @Cached TruffleStringIterator.NextNode nextNode, @Cached TruffleStringBuilder.AppendCodePointNode appendCodePointNode, @Cached TruffleStringBuilder.ToStringNode toStringNode) { - TruffleStringBuilder str = TruffleStringBuilder.create(TS_ENCODING, encoding.byteLength(TS_ENCODING)); + TruffleStringBuilderUTF32 str = TruffleStringBuilder.createUTF32(encoding.byteLength(TS_ENCODING)); boolean lastCharInvalid = false; TruffleStringIterator it = createCodePointIteratorNode.execute(encoding, TS_ENCODING); while (it.hasNext()) { diff --git a/graalpython/lib-python/3/json/decoder.py b/graalpython/lib-python/3/json/decoder.py index 5e5effeac0..4ecd47cd53 100644 --- a/graalpython/lib-python/3/json/decoder.py +++ b/graalpython/lib-python/3/json/decoder.py @@ -12,11 +12,6 @@ FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL -NaN = float('nan') -PosInf = float('inf') -NegInf = float('-inf') - - class JSONDecodeError(ValueError): """Subclass of ValueError with the following additional properties: @@ -43,13 +38,6 @@ def __reduce__(self): return self.__class__, (self.msg, self.doc, self.pos) -_CONSTANTS = { - '-Infinity': NegInf, - 'Infinity': PosInf, - 'NaN': NaN, -} - - HEXDIGITS = re.compile(r'[0-9A-Fa-f]{4}', FLAGS) STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) BACKSLASH = { @@ -318,9 +306,12 @@ def __init__(self, *, object_hook=None, parse_float=None, including ``'\\t'`` (tab), ``'\\n'``, ``'\\r'`` and ``'\\0'``. """ self.object_hook = object_hook - self.parse_float = parse_float or float - self.parse_int = parse_int or int - self.parse_constant = parse_constant or _CONSTANTS.__getitem__ + # graalpy change: don't use "float()" as default for parse_float, unnecessary overhead + self.parse_float = parse_float + # graalpy change: don't use "int()" as default for parse_int, unnecessary overhead + self.parse_int = parse_int + # graalpy change: don't use _CONSTANTS.__getitem__ as default for parse_constant, unnecessary overhead + self.parse_constant = parse_constant self.strict = strict self.object_pairs_hook = object_pairs_hook self.parse_object = JSONObject diff --git a/graalpython/lib-python/3/json/scanner.py b/graalpython/lib-python/3/json/scanner.py index 090897515f..588ab2a683 100644 --- a/graalpython/lib-python/3/json/scanner.py +++ b/graalpython/lib-python/3/json/scanner.py @@ -8,6 +8,18 @@ __all__ = ['make_scanner'] +# graalpy change: moved here from decoder.py +NaN = float('nan') +PosInf = float('inf') +NegInf = float('-inf') + +# graalpy change: moved here from decoder.py +_CONSTANTS = { + '-Infinity': NegInf, + 'Infinity': PosInf, + 'NaN': NaN, +} + NUMBER_RE = re.compile( r'(-?(?:0|[1-9][0-9]*))(\.[0-9]+)?([eE][-+]?[0-9]+)?', (re.VERBOSE | re.MULTILINE | re.DOTALL)) @@ -18,9 +30,10 @@ def py_make_scanner(context): parse_string = context.parse_string match_number = NUMBER_RE.match strict = context.strict - parse_float = context.parse_float - parse_int = context.parse_int - parse_constant = context.parse_constant + # graalpy change: allow the parser functions to be None for better performance in JSONScannerBuiltins + parse_float = context.parse_float or float + parse_int = context.parse_int or int + parse_constant = context.parse_constant or _CONSTANTS.__getitem__ object_hook = context.object_hook object_pairs_hook = context.object_pairs_hook memo = context.memo