1 /*
2 * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "compiler/compileLog.hpp"
27 #include "gc/shared/collectedHeap.inline.hpp"
28 #include "libadt/vectset.hpp"
29 #include "memory/universe.hpp"
30 #include "opto/addnode.hpp"
31 #include "opto/arraycopynode.hpp"
32 #include "opto/callnode.hpp"
33 #include "opto/castnode.hpp"
34 #include "opto/cfgnode.hpp"
35 #include "opto/compile.hpp"
36 #include "opto/convertnode.hpp"
37 #include "opto/graphKit.hpp"
38 #include "opto/intrinsicnode.hpp"
39 #include "opto/locknode.hpp"
40 #include "opto/loopnode.hpp"
41 #include "opto/macro.hpp"
42 #include "opto/memnode.hpp"
43 #include "opto/narrowptrnode.hpp"
44 #include "opto/node.hpp"
45 #include "opto/opaquenode.hpp"
46 #include "opto/phaseX.hpp"
47 #include "opto/rootnode.hpp"
48 #include "opto/runtime.hpp"
49 #include "opto/subnode.hpp"
50 #include "opto/subtypenode.hpp"
51 #include "opto/type.hpp"
52 #include "runtime/sharedRuntime.hpp"
53 #include "utilities/macros.hpp"
54 #include "utilities/powerOfTwo.hpp"
55 #if INCLUDE_G1GC
56 #include "gc/g1/g1ThreadLocalData.hpp"
57 #endif // INCLUDE_G1GC
58 #if INCLUDE_SHENANDOAHGC
59 #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
60 #endif
61
62
63 //
64 // Replace any references to "oldref" in inputs to "use" with "newref".
65 // Returns the number of replacements made.
66 //
67 int PhaseMacroExpand::replace_input(Node *use, Node *oldref, Node *newref) {
68 int nreplacements = 0;
69 uint req = use->req();
70 for (uint j = 0; j < use->len(); j++) {
71 Node *uin = use->in(j);
72 if (uin == oldref) {
73 if (j < req)
74 use->set_req(j, newref);
75 else
76 use->set_prec(j, newref);
77 nreplacements++;
78 } else if (j >= req && uin == NULL) {
79 break;
80 }
81 }
82 return nreplacements;
83 }
84
85 void PhaseMacroExpand::migrate_outs(Node *old, Node *target) {
86 assert(old != NULL, "sanity");
87 for (DUIterator_Fast imax, i = old->fast_outs(imax); i < imax; i++) {
88 Node* use = old->fast_out(i);
89 _igvn.rehash_node_delayed(use);
90 imax -= replace_input(use, old, target);
91 // back up iterator
92 --i;
93 }
94 assert(old->outcnt() == 0, "all uses must be deleted");
95 }
96
97 void PhaseMacroExpand::copy_call_debug_info(CallNode *oldcall, CallNode * newcall) {
98 // Copy debug information and adjust JVMState information
99 uint old_dbg_start = oldcall->tf()->domain()->cnt();
100 uint new_dbg_start = newcall->tf()->domain()->cnt();
101 int jvms_adj = new_dbg_start - old_dbg_start;
102 assert (new_dbg_start == newcall->req(), "argument count mismatch");
103
104 // SafePointScalarObject node could be referenced several times in debug info.
105 // Use Dict to record cloned nodes.
106 Dict* sosn_map = new Dict(cmpkey,hashkey);
107 for (uint i = old_dbg_start; i < oldcall->req(); i++) {
108 Node* old_in = oldcall->in(i);
109 // Clone old SafePointScalarObjectNodes, adjusting their field contents.
110 if (old_in != NULL && old_in->is_SafePointScalarObject()) {
111 SafePointScalarObjectNode* old_sosn = old_in->as_SafePointScalarObject();
112 uint old_unique = C->unique();
113 Node* new_in = old_sosn->clone(sosn_map);
114 if (old_unique != C->unique()) { // New node?
115 new_in->set_req(0, C->root()); // reset control edge
116 new_in = transform_later(new_in); // Register new node.
117 }
118 old_in = new_in;
119 }
120 newcall->add_req(old_in);
121 }
122
123 // JVMS may be shared so clone it before we modify it
124 newcall->set_jvms(oldcall->jvms() != NULL ? oldcall->jvms()->clone_deep(C) : NULL);
125 for (JVMState *jvms = newcall->jvms(); jvms != NULL; jvms = jvms->caller()) {
126 jvms->set_map(newcall);
127 jvms->set_locoff(jvms->locoff()+jvms_adj);
128 jvms->set_stkoff(jvms->stkoff()+jvms_adj);
129 jvms->set_monoff(jvms->monoff()+jvms_adj);
130 jvms->set_scloff(jvms->scloff()+jvms_adj);
131 jvms->set_endoff(jvms->endoff()+jvms_adj);
132 }
133 }
134
135 Node* PhaseMacroExpand::opt_bits_test(Node* ctrl, Node* region, int edge, Node* word, int mask, int bits, bool return_fast_path) {
136 Node* cmp;
137 if (mask != 0) {
138 Node* and_node = transform_later(new AndXNode(word, MakeConX(mask)));
139 cmp = transform_later(new CmpXNode(and_node, MakeConX(bits)));
140 } else {
141 cmp = word;
142 }
143 Node* bol = transform_later(new BoolNode(cmp, BoolTest::ne));
144 IfNode* iff = new IfNode( ctrl, bol, PROB_MIN, COUNT_UNKNOWN );
145 transform_later(iff);
146
147 // Fast path taken.
148 Node *fast_taken = transform_later(new IfFalseNode(iff));
149
150 // Fast path not-taken, i.e. slow path
151 Node *slow_taken = transform_later(new IfTrueNode(iff));
152
153 if (return_fast_path) {
154 region->init_req(edge, slow_taken); // Capture slow-control
155 return fast_taken;
156 } else {
157 region->init_req(edge, fast_taken); // Capture fast-control
158 return slow_taken;
159 }
160 }
161
162 //--------------------copy_predefined_input_for_runtime_call--------------------
163 void PhaseMacroExpand::copy_predefined_input_for_runtime_call(Node * ctrl, CallNode* oldcall, CallNode* call) {
164 // Set fixed predefined input arguments
165 call->init_req( TypeFunc::Control, ctrl );
166 call->init_req( TypeFunc::I_O , oldcall->in( TypeFunc::I_O) );
167 call->init_req( TypeFunc::Memory , oldcall->in( TypeFunc::Memory ) ); // ?????
168 call->init_req( TypeFunc::ReturnAdr, oldcall->in( TypeFunc::ReturnAdr ) );
169 call->init_req( TypeFunc::FramePtr, oldcall->in( TypeFunc::FramePtr ) );
170 }
171
172 //------------------------------make_slow_call---------------------------------
173 CallNode* PhaseMacroExpand::make_slow_call(CallNode *oldcall, const TypeFunc* slow_call_type,
174 address slow_call, const char* leaf_name, Node* slow_path,
175 Node* parm0, Node* parm1, Node* parm2) {
176
177 // Slow-path call
178 CallNode *call = leaf_name
179 ? (CallNode*)new CallLeafNode ( slow_call_type, slow_call, leaf_name, TypeRawPtr::BOTTOM )
180 : (CallNode*)new CallStaticJavaNode( slow_call_type, slow_call, OptoRuntime::stub_name(slow_call), oldcall->jvms()->bci(), TypeRawPtr::BOTTOM );
181
182 // Slow path call has no side-effects, uses few values
183 copy_predefined_input_for_runtime_call(slow_path, oldcall, call );
184 if (parm0 != NULL) call->init_req(TypeFunc::Parms+0, parm0);
185 if (parm1 != NULL) call->init_req(TypeFunc::Parms+1, parm1);
186 if (parm2 != NULL) call->init_req(TypeFunc::Parms+2, parm2);
187 copy_call_debug_info(oldcall, call);
188 call->set_cnt(PROB_UNLIKELY_MAG(4)); // Same effect as RC_UNCOMMON.
189 _igvn.replace_node(oldcall, call);
190 transform_later(call);
191
192 return call;
193 }
194
195 void PhaseMacroExpand::extract_call_projections(CallNode *call) {
196 _fallthroughproj = NULL;
197 _fallthroughcatchproj = NULL;
198 _ioproj_fallthrough = NULL;
199 _ioproj_catchall = NULL;
200 _catchallcatchproj = NULL;
201 _memproj_fallthrough = NULL;
202 _memproj_catchall = NULL;
203 _resproj = NULL;
204 for (DUIterator_Fast imax, i = call->fast_outs(imax); i < imax; i++) {
205 ProjNode *pn = call->fast_out(i)->as_Proj();
206 switch (pn->_con) {
207 case TypeFunc::Control:
208 {
209 // For Control (fallthrough) and I_O (catch_all_index) we have CatchProj -> Catch -> Proj
210 _fallthroughproj = pn;
211 DUIterator_Fast jmax, j = pn->fast_outs(jmax);
212 const Node *cn = pn->fast_out(j);
213 if (cn->is_Catch()) {
214 ProjNode *cpn = NULL;
215 for (DUIterator_Fast kmax, k = cn->fast_outs(kmax); k < kmax; k++) {
216 cpn = cn->fast_out(k)->as_Proj();
217 assert(cpn->is_CatchProj(), "must be a CatchProjNode");
218 if (cpn->_con == CatchProjNode::fall_through_index)
219 _fallthroughcatchproj = cpn;
220 else {
221 assert(cpn->_con == CatchProjNode::catch_all_index, "must be correct index.");
222 _catchallcatchproj = cpn;
223 }
224 }
225 }
226 break;
227 }
228 case TypeFunc::I_O:
229 if (pn->_is_io_use)
230 _ioproj_catchall = pn;
231 else
232 _ioproj_fallthrough = pn;
233 break;
234 case TypeFunc::Memory:
235 if (pn->_is_io_use)
236 _memproj_catchall = pn;
237 else
238 _memproj_fallthrough = pn;
239 break;
240 case TypeFunc::Parms:
241 _resproj = pn;
242 break;
243 default:
244 assert(false, "unexpected projection from allocation node.");
245 }
246 }
247
248 }
249
250 void PhaseMacroExpand::eliminate_gc_barrier(Node* p2x) {
251 BarrierSetC2 *bs = BarrierSet::barrier_set()->barrier_set_c2();
252 bs->eliminate_gc_barrier(this, p2x);
253 }
254
255 // Search for a memory operation for the specified memory slice.
256 static Node *scan_mem_chain(Node *mem, int alias_idx, int offset, Node *start_mem, Node *alloc, PhaseGVN *phase) {
257 Node *orig_mem = mem;
258 Node *alloc_mem = alloc->in(TypeFunc::Memory);
259 const TypeOopPtr *tinst = phase->C->get_adr_type(alias_idx)->isa_oopptr();
260 while (true) {
261 if (mem == alloc_mem || mem == start_mem ) {
262 return mem; // hit one of our sentinels
263 } else if (mem->is_MergeMem()) {
264 mem = mem->as_MergeMem()->memory_at(alias_idx);
265 } else if (mem->is_Proj() && mem->as_Proj()->_con == TypeFunc::Memory) {
266 Node *in = mem->in(0);
267 // we can safely skip over safepoints, calls, locks and membars because we
268 // already know that the object is safe to eliminate.
269 if (in->is_Initialize() && in->as_Initialize()->allocation() == alloc) {
270 return in;
271 } else if (in->is_Call()) {
272 CallNode *call = in->as_Call();
273 if (call->may_modify(tinst, phase)) {
274 assert(call->is_ArrayCopy(), "ArrayCopy is the only call node that doesn't make allocation escape");
275 if (call->as_ArrayCopy()->modifies(offset, offset, phase, false)) {
276 return in;
277 }
278 }
279 mem = in->in(TypeFunc::Memory);
280 } else if (in->is_MemBar()) {
281 ArrayCopyNode* ac = NULL;
282 if (ArrayCopyNode::may_modify(tinst, in->as_MemBar(), phase, ac)) {
283 assert(ac != NULL && ac->is_clonebasic(), "Only basic clone is a non escaping clone");
284 return ac;
285 }
286 mem = in->in(TypeFunc::Memory);
287 } else {
288 assert(false, "unexpected projection");
289 }
290 } else if (mem->is_Store()) {
291 const TypePtr* atype = mem->as_Store()->adr_type();
292 int adr_idx = phase->C->get_alias_index(atype);
293 if (adr_idx == alias_idx) {
294 assert(atype->isa_oopptr(), "address type must be oopptr");
295 int adr_offset = atype->offset();
296 uint adr_iid = atype->is_oopptr()->instance_id();
297 // Array elements references have the same alias_idx
298 // but different offset and different instance_id.
299 if (adr_offset == offset && adr_iid == alloc->_idx)
300 return mem;
301 } else {
302 assert(adr_idx == Compile::AliasIdxRaw, "address must match or be raw");
303 }
304 mem = mem->in(MemNode::Memory);
305 } else if (mem->is_ClearArray()) {
306 intptr_t offset;
307 AllocateNode* alloc = AllocateNode::Ideal_allocation(mem->in(3), phase, offset);
308
309 if (alloc == NULL) {
310 return start_mem;
311 }
312
313 if (!ClearArrayNode::step_through(&mem, alloc->_idx, phase)) {
314 // Can not bypass initialization of the instance
315 // we are looking.
316 debug_only(intptr_t offset;)
317 assert(alloc == AllocateNode::Ideal_allocation(mem->in(3), phase, offset), "sanity");
318 InitializeNode* init = alloc->as_Allocate()->initialization();
319 // We are looking for stored value, return Initialize node
320 // or memory edge from Allocate node.
321 if (init != NULL)
322 return init;
323 else
324 return alloc->in(TypeFunc::Memory); // It will produce zero value (see callers).
325 }
326 // Otherwise skip it (the call updated 'mem' value).
327 } else if (mem->Opcode() == Op_SCMemProj) {
328 mem = mem->in(0);
329 Node* adr = NULL;
330 if (mem->is_LoadStore()) {
331 adr = mem->in(MemNode::Address);
332 } else {
333 assert(mem->Opcode() == Op_EncodeISOArray ||
334 mem->Opcode() == Op_StrCompressedCopy, "sanity");
335 adr = mem->in(3); // Destination array
336 }
337 const TypePtr* atype = adr->bottom_type()->is_ptr();
338 int adr_idx = phase->C->get_alias_index(atype);
339 if (adr_idx == alias_idx) {
340 DEBUG_ONLY(mem->dump();)
341 assert(false, "Object is not scalar replaceable if a LoadStore node accesses its field");
342 return NULL;
343 }
344 mem = mem->in(MemNode::Memory);
345 } else if (mem->Opcode() == Op_StrInflatedCopy) {
346 Node* adr = mem->in(3); // Destination array
347 const TypePtr* atype = adr->bottom_type()->is_ptr();
348 int adr_idx = phase->C->get_alias_index(atype);
349 if (adr_idx == alias_idx) {
350 DEBUG_ONLY(mem->dump();)
351 assert(false, "Object is not scalar replaceable if a StrInflatedCopy node accesses its field");
352 return NULL;
353 }
354 mem = mem->in(MemNode::Memory);
355 } else {
356 return mem;
357 }
358 assert(mem != orig_mem, "dead memory loop");
359 }
360 }
361
362 // Generate loads from source of the arraycopy for fields of
363 // destination needed at a deoptimization point
364 Node* PhaseMacroExpand::make_arraycopy_load(ArrayCopyNode* ac, intptr_t offset, Node* ctl, Node* mem, BasicType ft, const Type *ftype, AllocateNode *alloc) {
365 BasicType bt = ft;
366 const Type *type = ftype;
367 if (ft == T_NARROWOOP) {
368 bt = T_OBJECT;
369 type = ftype->make_oopptr();
370 }
371 Node* res = NULL;
372 if (ac->is_clonebasic()) {
373 assert(ac->in(ArrayCopyNode::Src) != ac->in(ArrayCopyNode::Dest), "clone source equals destination");
374 Node* base = ac->in(ArrayCopyNode::Src);
375 Node* adr = _igvn.transform(new AddPNode(base, base, MakeConX(offset)));
376 const TypePtr* adr_type = _igvn.type(base)->is_ptr()->add_offset(offset);
377 res = LoadNode::make(_igvn, ctl, mem, adr, adr_type, type, bt, MemNode::unordered, LoadNode::UnknownControl);
378 } else {
379 if (ac->modifies(offset, offset, &_igvn, true)) {
380 assert(ac->in(ArrayCopyNode::Dest) == alloc->result_cast(), "arraycopy destination should be allocation's result");
381 uint shift = exact_log2(type2aelembytes(bt));
382 Node* src_pos = ac->in(ArrayCopyNode::SrcPos);
383 Node* dest_pos = ac->in(ArrayCopyNode::DestPos);
384 const TypeInt* src_pos_t = _igvn.type(src_pos)->is_int();
385 const TypeInt* dest_pos_t = _igvn.type(dest_pos)->is_int();
386
387 Node* adr = NULL;
388 const TypePtr* adr_type = NULL;
389 if (src_pos_t->is_con() && dest_pos_t->is_con()) {
390 intptr_t off = ((src_pos_t->get_con() - dest_pos_t->get_con()) << shift) + offset;
391 Node* base = ac->in(ArrayCopyNode::Src);
392 adr = _igvn.transform(new AddPNode(base, base, MakeConX(off)));
393 adr_type = _igvn.type(base)->is_ptr()->add_offset(off);
394 if (ac->in(ArrayCopyNode::Src) == ac->in(ArrayCopyNode::Dest)) {
395 // Don't emit a new load from src if src == dst but try to get the value from memory instead
396 return value_from_mem(ac->in(TypeFunc::Memory), ctl, ft, ftype, adr_type->isa_oopptr(), alloc);
397 }
398 } else {
399 Node* diff = _igvn.transform(new SubINode(ac->in(ArrayCopyNode::SrcPos), ac->in(ArrayCopyNode::DestPos)));
400 #ifdef _LP64
401 diff = _igvn.transform(new ConvI2LNode(diff));
402 #endif
403 diff = _igvn.transform(new LShiftXNode(diff, intcon(shift)));
404
405 Node* off = _igvn.transform(new AddXNode(MakeConX(offset), diff));
406 Node* base = ac->in(ArrayCopyNode::Src);
407 adr = _igvn.transform(new AddPNode(base, base, off));
408 adr_type = _igvn.type(base)->is_ptr()->add_offset(Type::OffsetBot);
409 if (ac->in(ArrayCopyNode::Src) == ac->in(ArrayCopyNode::Dest)) {
410 // Non constant offset in the array: we can't statically
411 // determine the value
412 return NULL;
413 }
414 }
415 res = LoadNode::make(_igvn, ctl, mem, adr, adr_type, type, bt, MemNode::unordered, LoadNode::UnknownControl);
416 }
417 }
418 if (res != NULL) {
419 res = _igvn.transform(res);
420 if (ftype->isa_narrowoop()) {
421 // PhaseMacroExpand::scalar_replacement adds DecodeN nodes
422 res = _igvn.transform(new EncodePNode(res, ftype));
423 }
424 return res;
425 }
426 return NULL;
427 }
428
429 //
430 // Given a Memory Phi, compute a value Phi containing the values from stores
431 // on the input paths.
432 // Note: this function is recursive, its depth is limited by the "level" argument
433 // Returns the computed Phi, or NULL if it cannot compute it.
434 Node *PhaseMacroExpand::value_from_mem_phi(Node *mem, BasicType ft, const Type *phi_type, const TypeOopPtr *adr_t, AllocateNode *alloc, Node_Stack *value_phis, int level) {
435 assert(mem->is_Phi(), "sanity");
436 int alias_idx = C->get_alias_index(adr_t);
437 int offset = adr_t->offset();
438 int instance_id = adr_t->instance_id();
439
440 // Check if an appropriate value phi already exists.
441 Node* region = mem->in(0);
442 for (DUIterator_Fast kmax, k = region->fast_outs(kmax); k < kmax; k++) {
443 Node* phi = region->fast_out(k);
444 if (phi->is_Phi() && phi != mem &&
445 phi->as_Phi()->is_same_inst_field(phi_type, (int)mem->_idx, instance_id, alias_idx, offset)) {
446 return phi;
447 }
448 }
449 // Check if an appropriate new value phi already exists.
450 Node* new_phi = value_phis->find(mem->_idx);
451 if (new_phi != NULL)
452 return new_phi;
453
454 if (level <= 0) {
455 return NULL; // Give up: phi tree too deep
456 }
457 Node *start_mem = C->start()->proj_out_or_null(TypeFunc::Memory);
458 Node *alloc_mem = alloc->in(TypeFunc::Memory);
459
460 uint length = mem->req();
461 GrowableArray <Node *> values(length, length, NULL);
462
463 // create a new Phi for the value
464 PhiNode *phi = new PhiNode(mem->in(0), phi_type, NULL, mem->_idx, instance_id, alias_idx, offset);
465 transform_later(phi);
466 value_phis->push(phi, mem->_idx);
467
468 for (uint j = 1; j < length; j++) {
469 Node *in = mem->in(j);
470 if (in == NULL || in->is_top()) {
471 values.at_put(j, in);
472 } else {
473 Node *val = scan_mem_chain(in, alias_idx, offset, start_mem, alloc, &_igvn);
474 if (val == start_mem || val == alloc_mem) {
475 // hit a sentinel, return appropriate 0 value
476 values.at_put(j, _igvn.zerocon(ft));
477 continue;
478 }
479 if (val->is_Initialize()) {
480 val = val->as_Initialize()->find_captured_store(offset, type2aelembytes(ft), &_igvn);
481 }
482 if (val == NULL) {
483 return NULL; // can't find a value on this path
484 }
485 if (val == mem) {
486 values.at_put(j, mem);
487 } else if (val->is_Store()) {
488 Node* n = val->in(MemNode::ValueIn);
489 BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
490 n = bs->step_over_gc_barrier(n);
491 values.at_put(j, n);
492 } else if(val->is_Proj() && val->in(0) == alloc) {
493 values.at_put(j, _igvn.zerocon(ft));
494 } else if (val->is_Phi()) {
495 val = value_from_mem_phi(val, ft, phi_type, adr_t, alloc, value_phis, level-1);
496 if (val == NULL) {
497 return NULL;
498 }
499 values.at_put(j, val);
500 } else if (val->Opcode() == Op_SCMemProj) {
501 assert(val->in(0)->is_LoadStore() ||
502 val->in(0)->Opcode() == Op_EncodeISOArray ||
503 val->in(0)->Opcode() == Op_StrCompressedCopy, "sanity");
504 assert(false, "Object is not scalar replaceable if a LoadStore node accesses its field");
505 return NULL;
506 } else if (val->is_ArrayCopy()) {
507 Node* res = make_arraycopy_load(val->as_ArrayCopy(), offset, val->in(0), val->in(TypeFunc::Memory), ft, phi_type, alloc);
508 if (res == NULL) {
509 return NULL;
510 }
511 values.at_put(j, res);
512 } else {
513 #ifdef ASSERT
514 val->dump();
515 assert(false, "unknown node on this path");
516 #endif
517 return NULL; // unknown node on this path
518 }
519 }
520 }
521 // Set Phi's inputs
522 for (uint j = 1; j < length; j++) {
523 if (values.at(j) == mem) {
524 phi->init_req(j, phi);
525 } else {
526 phi->init_req(j, values.at(j));
527 }
528 }
529 return phi;
530 }
531
532 // Search the last value stored into the object's field.
533 Node *PhaseMacroExpand::value_from_mem(Node *sfpt_mem, Node *sfpt_ctl, BasicType ft, const Type *ftype, const TypeOopPtr *adr_t, AllocateNode *alloc) {
534 assert(adr_t->is_known_instance_field(), "instance required");
535 int instance_id = adr_t->instance_id();
536 assert((uint)instance_id == alloc->_idx, "wrong allocation");
537
538 int alias_idx = C->get_alias_index(adr_t);
539 int offset = adr_t->offset();
540 Node *start_mem = C->start()->proj_out_or_null(TypeFunc::Memory);
541 Node *alloc_ctrl = alloc->in(TypeFunc::Control);
542 Node *alloc_mem = alloc->in(TypeFunc::Memory);
543 Arena *a = Thread::current()->resource_area();
544 VectorSet visited(a);
545
546 bool done = sfpt_mem == alloc_mem;
547 Node *mem = sfpt_mem;
548 while (!done) {
549 if (visited.test_set(mem->_idx)) {
550 return NULL; // found a loop, give up
551 }
552 mem = scan_mem_chain(mem, alias_idx, offset, start_mem, alloc, &_igvn);
553 if (mem == start_mem || mem == alloc_mem) {
554 done = true; // hit a sentinel, return appropriate 0 value
555 } else if (mem->is_Initialize()) {
556 mem = mem->as_Initialize()->find_captured_store(offset, type2aelembytes(ft), &_igvn);
557 if (mem == NULL) {
558 done = true; // Something go wrong.
559 } else if (mem->is_Store()) {
560 const TypePtr* atype = mem->as_Store()->adr_type();
561 assert(C->get_alias_index(atype) == Compile::AliasIdxRaw, "store is correct memory slice");
562 done = true;
563 }
564 } else if (mem->is_Store()) {
565 const TypeOopPtr* atype = mem->as_Store()->adr_type()->isa_oopptr();
566 assert(atype != NULL, "address type must be oopptr");
567 assert(C->get_alias_index(atype) == alias_idx &&
568 atype->is_known_instance_field() && atype->offset() == offset &&
569 atype->instance_id() == instance_id, "store is correct memory slice");
570 done = true;
571 } else if (mem->is_Phi()) {
572 // try to find a phi's unique input
573 Node *unique_input = NULL;
574 Node *top = C->top();
575 for (uint i = 1; i < mem->req(); i++) {
576 Node *n = scan_mem_chain(mem->in(i), alias_idx, offset, start_mem, alloc, &_igvn);
577 if (n == NULL || n == top || n == mem) {
578 continue;
579 } else if (unique_input == NULL) {
580 unique_input = n;
581 } else if (unique_input != n) {
582 unique_input = top;
583 break;
584 }
585 }
586 if (unique_input != NULL && unique_input != top) {
587 mem = unique_input;
588 } else {
589 done = true;
590 }
591 } else if (mem->is_ArrayCopy()) {
592 done = true;
593 } else {
594 assert(false, "unexpected node");
595 }
596 }
597 if (mem != NULL) {
598 if (mem == start_mem || mem == alloc_mem) {
599 // hit a sentinel, return appropriate 0 value
600 return _igvn.zerocon(ft);
601 } else if (mem->is_Store()) {
602 Node* n = mem->in(MemNode::ValueIn);
603 BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
604 n = bs->step_over_gc_barrier(n);
605 return n;
606 } else if (mem->is_Phi()) {
607 // attempt to produce a Phi reflecting the values on the input paths of the Phi
608 Node_Stack value_phis(a, 8);
609 Node * phi = value_from_mem_phi(mem, ft, ftype, adr_t, alloc, &value_phis, ValueSearchLimit);
610 if (phi != NULL) {
611 return phi;
612 } else {
613 // Kill all new Phis
614 while(value_phis.is_nonempty()) {
615 Node* n = value_phis.node();
616 _igvn.replace_node(n, C->top());
617 value_phis.pop();
618 }
619 }
620 } else if (mem->is_ArrayCopy()) {
621 Node* ctl = mem->in(0);
622 Node* m = mem->in(TypeFunc::Memory);
623 if (sfpt_ctl->is_Proj() && sfpt_ctl->as_Proj()->is_uncommon_trap_proj(Deoptimization::Reason_none)) {
624 // pin the loads in the uncommon trap path
625 ctl = sfpt_ctl;
626 m = sfpt_mem;
627 }
628 return make_arraycopy_load(mem->as_ArrayCopy(), offset, ctl, m, ft, ftype, alloc);
629 }
630 }
631 // Something go wrong.
632 return NULL;
633 }
634
635 // Check the possibility of scalar replacement.
636 bool PhaseMacroExpand::can_eliminate_allocation(AllocateNode *alloc, GrowableArray <SafePointNode *>& safepoints) {
637 // Scan the uses of the allocation to check for anything that would
638 // prevent us from eliminating it.
639 NOT_PRODUCT( const char* fail_eliminate = NULL; )
640 DEBUG_ONLY( Node* disq_node = NULL; )
641 bool can_eliminate = true;
642
643 Node* res = alloc->result_cast();
644 const TypeOopPtr* res_type = NULL;
645 if (res == NULL) {
646 // All users were eliminated.
647 } else if (!res->is_CheckCastPP()) {
648 NOT_PRODUCT(fail_eliminate = "Allocation does not have unique CheckCastPP";)
649 can_eliminate = false;
650 } else {
651 res_type = _igvn.type(res)->isa_oopptr();
652 if (res_type == NULL) {
653 NOT_PRODUCT(fail_eliminate = "Neither instance or array allocation";)
654 can_eliminate = false;
655 } else if (res_type->isa_aryptr()) {
656 int length = alloc->in(AllocateNode::ALength)->find_int_con(-1);
657 if (length < 0) {
658 NOT_PRODUCT(fail_eliminate = "Array's size is not constant";)
659 can_eliminate = false;
660 }
661 }
662 }
663
664 if (can_eliminate && res != NULL) {
665 for (DUIterator_Fast jmax, j = res->fast_outs(jmax);
666 j < jmax && can_eliminate; j++) {
667 Node* use = res->fast_out(j);
668
669 if (use->is_AddP()) {
670 const TypePtr* addp_type = _igvn.type(use)->is_ptr();
671 int offset = addp_type->offset();
672
673 if (offset == Type::OffsetTop || offset == Type::OffsetBot) {
674 NOT_PRODUCT(fail_eliminate = "Undefined field referrence";)
675 can_eliminate = false;
676 break;
677 }
678 for (DUIterator_Fast kmax, k = use->fast_outs(kmax);
679 k < kmax && can_eliminate; k++) {
680 Node* n = use->fast_out(k);
681 if (!n->is_Store() && n->Opcode() != Op_CastP2X
682 SHENANDOAHGC_ONLY(&& (!UseShenandoahGC || !ShenandoahBarrierSetC2::is_shenandoah_wb_pre_call(n))) ) {
683 DEBUG_ONLY(disq_node = n;)
684 if (n->is_Load() || n->is_LoadStore()) {
685 NOT_PRODUCT(fail_eliminate = "Field load";)
686 } else {
687 NOT_PRODUCT(fail_eliminate = "Not store field referrence";)
688 }
689 can_eliminate = false;
690 }
691 }
692 } else if (use->is_ArrayCopy() &&
693 (use->as_ArrayCopy()->is_clonebasic() ||
694 use->as_ArrayCopy()->is_arraycopy_validated() ||
695 use->as_ArrayCopy()->is_copyof_validated() ||
696 use->as_ArrayCopy()->is_copyofrange_validated()) &&
697 use->in(ArrayCopyNode::Dest) == res) {
698 // ok to eliminate
699 } else if (use->is_SafePoint()) {
700 SafePointNode* sfpt = use->as_SafePoint();
701 if (sfpt->is_Call() && sfpt->as_Call()->has_non_debug_use(res)) {
702 // Object is passed as argument.
703 DEBUG_ONLY(disq_node = use;)
704 NOT_PRODUCT(fail_eliminate = "Object is passed as argument";)
705 can_eliminate = false;
706 }
707 Node* sfptMem = sfpt->memory();
708 if (sfptMem == NULL || sfptMem->is_top()) {
709 DEBUG_ONLY(disq_node = use;)
710 NOT_PRODUCT(fail_eliminate = "NULL or TOP memory";)
711 can_eliminate = false;
712 } else {
713 safepoints.append_if_missing(sfpt);
714 }
715 } else if (use->Opcode() != Op_CastP2X) { // CastP2X is used by card mark
716 if (use->is_Phi()) {
717 if (use->outcnt() == 1 && use->unique_out()->Opcode() == Op_Return) {
718 NOT_PRODUCT(fail_eliminate = "Object is return value";)
719 } else {
720 NOT_PRODUCT(fail_eliminate = "Object is referenced by Phi";)
721 }
722 DEBUG_ONLY(disq_node = use;)
723 } else {
724 if (use->Opcode() == Op_Return) {
725 NOT_PRODUCT(fail_eliminate = "Object is return value";)
726 }else {
727 NOT_PRODUCT(fail_eliminate = "Object is referenced by node";)
728 }
729 DEBUG_ONLY(disq_node = use;)
730 }
731 can_eliminate = false;
732 }
733 }
734 }
735
736 #ifndef PRODUCT
737 if (print_eliminate_allocations()) {
738 if (can_eliminate) {
739 tty->print("Scalar ");
740 if (res == NULL)
741 alloc->dump();
742 else
743 res->dump();
744 } else if (alloc->_is_scalar_replaceable) {
745 tty->print("NotScalar (%s)", fail_eliminate);
746 if (res == NULL)
747 alloc->dump();
748 else
749 res->dump();
750 #ifdef ASSERT
751 if (disq_node != NULL) {
752 tty->print(" >>>> ");
753 disq_node->dump();
754 }
755 #endif /*ASSERT*/
756 }
757 }
758 #endif
759 return can_eliminate;
760 }
761
762 void PhaseMacroExpand::adjust_safepoint_jvms(SafePointNode* sfpt, Node* res, SafePointScalarObjectNode* sobj) {
763 JVMState *jvms = sfpt->jvms();
764 jvms->set_endoff(sfpt->req());
765
766 // Now make a pass over the debug information replacing any references
767 // to the allocated object with "sobj"
768 int start = jvms->debug_start();
769 int end = jvms->debug_end();
770 sfpt->replace_edges_in_range(res, sobj, start, end);
771 _igvn._worklist.push(sfpt);
772 }
773
774 // Do scalar replacement.
775 bool PhaseMacroExpand::scalar_replacement(AllocateNode *alloc, GrowableArray <SafePointNode *>& safepoints) {
776 GrowableArray <SafePointNode *> safepoints_done;
777
778 ciKlass* klass = NULL;
779 ciInstanceKlass* iklass = NULL;
780 int nfields = 0;
781 int array_base = 0;
782 int element_size = 0;
783 BasicType basic_elem_type = T_ILLEGAL;
784 ciType* elem_type = NULL;
785
786 Node* res = alloc->result_cast();
787 assert(res == NULL || res->is_CheckCastPP(), "unexpected AllocateNode result");
788 const TypeOopPtr* res_type = NULL;
789 if (res != NULL) { // Could be NULL when there are no users
790 res_type = _igvn.type(res)->isa_oopptr();
791 }
792
793 if (res != NULL) {
794 klass = res_type->klass();
795 if (res_type->isa_instptr()) {
796 // find the fields of the class which will be needed for safepoint debug information
797 assert(klass->is_instance_klass(), "must be an instance klass.");
798 iklass = klass->as_instance_klass();
799 nfields = iklass->nof_nonstatic_fields();
800 } else {
801 // find the array's elements which will be needed for safepoint debug information
802 nfields = alloc->in(AllocateNode::ALength)->find_int_con(-1);
803 assert(klass->is_array_klass() && nfields >= 0, "must be an array klass.");
804 elem_type = klass->as_array_klass()->element_type();
805 basic_elem_type = elem_type->basic_type();
806 array_base = arrayOopDesc::base_offset_in_bytes(basic_elem_type);
807 element_size = type2aelembytes(basic_elem_type);
808 }
809 }
810 //
811 // Process the safepoint uses
812 //
813 while (safepoints.length() > 0) {
814 SafePointNode* sfpt = safepoints.pop();
815 Node* mem = sfpt->memory();
816 Node* ctl = sfpt->control();
817 assert(sfpt->jvms() != NULL, "missed JVMS");
818 // Fields of scalar objs are referenced only at the end
819 // of regular debuginfo at the last (youngest) JVMS.
820 // Record relative start index.
821 uint first_ind = (sfpt->req() - sfpt->jvms()->scloff());
822 SafePointScalarObjectNode* sobj = new SafePointScalarObjectNode(res_type,
823 #ifdef ASSERT
824 alloc,
825 #endif
826 first_ind, nfields);
827 sobj->init_req(0, C->root());
828 transform_later(sobj);
829
830 // Scan object's fields adding an input to the safepoint for each field.
831 for (int j = 0; j < nfields; j++) {
832 intptr_t offset;
833 ciField* field = NULL;
834 if (iklass != NULL) {
835 field = iklass->nonstatic_field_at(j);
836 offset = field->offset();
837 elem_type = field->type();
838 basic_elem_type = field->layout_type();
839 } else {
840 offset = array_base + j * (intptr_t)element_size;
841 }
842
843 const Type *field_type;
844 // The next code is taken from Parse::do_get_xxx().
845 if (is_reference_type(basic_elem_type)) {
846 if (!elem_type->is_loaded()) {
847 field_type = TypeInstPtr::BOTTOM;
848 } else if (field != NULL && field->is_static_constant()) {
849 // This can happen if the constant oop is non-perm.
850 ciObject* con = field->constant_value().as_object();
851 // Do not "join" in the previous type; it doesn't add value,
852 // and may yield a vacuous result if the field is of interface type.
853 field_type = TypeOopPtr::make_from_constant(con)->isa_oopptr();
854 assert(field_type != NULL, "field singleton type must be consistent");
855 } else {
856 field_type = TypeOopPtr::make_from_klass(elem_type->as_klass());
857 }
858 if (UseCompressedOops) {
859 field_type = field_type->make_narrowoop();
860 basic_elem_type = T_NARROWOOP;
861 }
862 } else {
863 field_type = Type::get_const_basic_type(basic_elem_type);
864 }
865
866 const TypeOopPtr *field_addr_type = res_type->add_offset(offset)->isa_oopptr();
867
868 Node *field_val = value_from_mem(mem, ctl, basic_elem_type, field_type, field_addr_type, alloc);
869 if (field_val == NULL) {
870 // We weren't able to find a value for this field,
871 // give up on eliminating this allocation.
872
873 // Remove any extra entries we added to the safepoint.
874 uint last = sfpt->req() - 1;
875 for (int k = 0; k < j; k++) {
876 sfpt->del_req(last--);
877 }
878 _igvn._worklist.push(sfpt);
879 // rollback processed safepoints
880 while (safepoints_done.length() > 0) {
881 SafePointNode* sfpt_done = safepoints_done.pop();
882 // remove any extra entries we added to the safepoint
883 last = sfpt_done->req() - 1;
884 for (int k = 0; k < nfields; k++) {
885 sfpt_done->del_req(last--);
886 }
887 JVMState *jvms = sfpt_done->jvms();
888 jvms->set_endoff(sfpt_done->req());
889 // Now make a pass over the debug information replacing any references
890 // to SafePointScalarObjectNode with the allocated object.
891 int start = jvms->debug_start();
892 int end = jvms->debug_end();
893 for (int i = start; i < end; i++) {
894 if (sfpt_done->in(i)->is_SafePointScalarObject()) {
895 SafePointScalarObjectNode* scobj = sfpt_done->in(i)->as_SafePointScalarObject();
896 if (scobj->first_index(jvms) == sfpt_done->req() &&
897 scobj->n_fields() == (uint)nfields) {
898 assert(scobj->alloc() == alloc, "sanity");
899 sfpt_done->set_req(i, res);
900 }
901 }
902 }
903 _igvn._worklist.push(sfpt_done);
904 }
905 #ifndef PRODUCT
906 if (print_eliminate_allocations()) {
907 if (field != NULL) {
908 tty->print("=== At SafePoint node %d can't find value of Field: ",
909 sfpt->_idx);
910 field->print();
911 int field_idx = C->get_alias_index(field_addr_type);
912 tty->print(" (alias_idx=%d)", field_idx);
913 } else { // Array's element
914 tty->print("=== At SafePoint node %d can't find value of array element [%d]",
915 sfpt->_idx, j);
916 }
917 tty->print(", which prevents elimination of: ");
918 if (res == NULL)
919 alloc->dump();
920 else
921 res->dump();
922 }
923 #endif
924 return false;
925 }
926 if (UseCompressedOops && field_type->isa_narrowoop()) {
927 // Enable "DecodeN(EncodeP(Allocate)) --> Allocate" transformation
928 // to be able scalar replace the allocation.
929 if (field_val->is_EncodeP()) {
930 field_val = field_val->in(1);
931 } else {
932 field_val = transform_later(new DecodeNNode(field_val, field_val->get_ptr_type()));
933 }
934 }
935 sfpt->add_req(field_val);
936 }
937 adjust_safepoint_jvms(sfpt, res, sobj);
938 safepoints_done.append_if_missing(sfpt); // keep it for rollback
939 }
940 return true;
941 }
942
943 static void disconnect_projections(MultiNode* n, PhaseIterGVN& igvn) {
944 Node* ctl_proj = n->proj_out_or_null(TypeFunc::Control);
945 Node* mem_proj = n->proj_out_or_null(TypeFunc::Memory);
946 if (ctl_proj != NULL) {
947 igvn.replace_node(ctl_proj, n->in(0));
948 }
949 if (mem_proj != NULL) {
950 igvn.replace_node(mem_proj, n->in(TypeFunc::Memory));
951 }
952 }
953
954 // Process users of eliminated allocation.
955 void PhaseMacroExpand::process_users_of_allocation(CallNode *alloc) {
956 Node* res = alloc->result_cast();
957 if (res != NULL) {
958 for (DUIterator_Last jmin, j = res->last_outs(jmin); j >= jmin; ) {
959 Node *use = res->last_out(j);
960 uint oc1 = res->outcnt();
961
962 if (use->is_AddP()) {
963 for (DUIterator_Last kmin, k = use->last_outs(kmin); k >= kmin; ) {
964 Node *n = use->last_out(k);
965 uint oc2 = use->outcnt();
966 if (n->is_Store()) {
967 #ifdef ASSERT
968 // Verify that there is no dependent MemBarVolatile nodes,
969 // they should be removed during IGVN, see MemBarNode::Ideal().
970 for (DUIterator_Fast pmax, p = n->fast_outs(pmax);
971 p < pmax; p++) {
972 Node* mb = n->fast_out(p);
973 assert(mb->is_Initialize() || !mb->is_MemBar() ||
974 mb->req() <= MemBarNode::Precedent ||
975 mb->in(MemBarNode::Precedent) != n,
976 "MemBarVolatile should be eliminated for non-escaping object");
977 }
978 #endif
979 _igvn.replace_node(n, n->in(MemNode::Memory));
980 } else {
981 eliminate_gc_barrier(n);
982 }
983 k -= (oc2 - use->outcnt());
984 }
985 _igvn.remove_dead_node(use);
986 } else if (use->is_ArrayCopy()) {
987 // Disconnect ArrayCopy node
988 ArrayCopyNode* ac = use->as_ArrayCopy();
989 if (ac->is_clonebasic()) {
990 Node* membar_after = ac->proj_out(TypeFunc::Control)->unique_ctrl_out();
991 disconnect_projections(ac, _igvn);
992 assert(alloc->in(TypeFunc::Memory)->is_Proj() && alloc->in(TypeFunc::Memory)->in(0)->Opcode() == Op_MemBarCPUOrder, "mem barrier expected before allocation");
993 Node* membar_before = alloc->in(TypeFunc::Memory)->in(0);
994 disconnect_projections(membar_before->as_MemBar(), _igvn);
995 if (membar_after->is_MemBar()) {
996 disconnect_projections(membar_after->as_MemBar(), _igvn);
997 }
998 } else {
999 assert(ac->is_arraycopy_validated() ||
1000 ac->is_copyof_validated() ||
1001 ac->is_copyofrange_validated(), "unsupported");
1002 CallProjections callprojs;
1003 ac->extract_projections(&callprojs, true);
1004
1005 _igvn.replace_node(callprojs.fallthrough_ioproj, ac->in(TypeFunc::I_O));
1006 _igvn.replace_node(callprojs.fallthrough_memproj, ac->in(TypeFunc::Memory));
1007 _igvn.replace_node(callprojs.fallthrough_catchproj, ac->in(TypeFunc::Control));
1008
1009 // Set control to top. IGVN will remove the remaining projections
1010 ac->set_req(0, top());
1011 ac->replace_edge(res, top());
1012
1013 // Disconnect src right away: it can help find new
1014 // opportunities for allocation elimination
1015 Node* src = ac->in(ArrayCopyNode::Src);
1016 ac->replace_edge(src, top());
1017 // src can be top at this point if src and dest of the
1018 // arraycopy were the same
1019 if (src->outcnt() == 0 && !src->is_top()) {
1020 _igvn.remove_dead_node(src);
1021 }
1022 }
1023 _igvn._worklist.push(ac);
1024 } else {
1025 eliminate_gc_barrier(use);
1026 }
1027 j -= (oc1 - res->outcnt());
1028 }
1029 assert(res->outcnt() == 0, "all uses of allocated objects must be deleted");
1030 _igvn.remove_dead_node(res);
1031 }
1032
1033 eliminate_unused_allocation_edges(alloc);
1034 }
1035
1036 void PhaseMacroExpand::eliminate_unused_allocation_edges(CallNode* alloc) {
1037 //
1038 // Process other users of allocation's projections
1039 //
1040 if (_resproj != NULL && _resproj->outcnt() != 0) {
1041 // First disconnect stores captured by Initialize node.
1042 // If Initialize node is eliminated first in the following code,
1043 // it will kill such stores and DUIterator_Last will assert.
1044 for (DUIterator_Fast jmax, j = _resproj->fast_outs(jmax); j < jmax; j++) {
1045 Node *use = _resproj->fast_out(j);
1046 if (use->is_AddP()) {
1047 // raw memory addresses used only by the initialization
1048 _igvn.replace_node(use, C->top());
1049 --j; --jmax;
1050 }
1051 }
1052 for (DUIterator_Last jmin, j = _resproj->last_outs(jmin); j >= jmin; ) {
1053 Node *use = _resproj->last_out(j);
1054 uint oc1 = _resproj->outcnt();
1055 if (use->is_Initialize()) {
1056 // Eliminate Initialize node.
1057 InitializeNode *init = use->as_Initialize();
1058 assert(init->outcnt() <= 2, "only a control and memory projection expected");
1059 Node *ctrl_proj = init->proj_out_or_null(TypeFunc::Control);
1060 if (ctrl_proj != NULL) {
1061 _igvn.replace_node(ctrl_proj, init->in(TypeFunc::Control));
1062 #ifdef ASSERT
1063 Node* tmp = init->in(TypeFunc::Control);
1064 assert(tmp == _fallthroughcatchproj, "allocation control projection");
1065 #endif
1066 }
1067 Node *mem_proj = init->proj_out_or_null(TypeFunc::Memory);
1068 if (mem_proj != NULL) {
1069 Node *mem = init->in(TypeFunc::Memory);
1070 #ifdef ASSERT
1071 if (mem->is_MergeMem()) {
1072 assert(mem->in(TypeFunc::Memory) == _memproj_fallthrough, "allocation memory projection");
1073 } else {
1074 assert(mem == _memproj_fallthrough, "allocation memory projection");
1075 }
1076 #endif
1077 _igvn.replace_node(mem_proj, mem);
1078 }
1079 } else {
1080 assert(false, "only Initialize or AddP expected");
1081 }
1082 j -= (oc1 - _resproj->outcnt());
1083 }
1084 }
1085 if (_fallthroughcatchproj != NULL) {
1086 _igvn.replace_node(_fallthroughcatchproj, alloc->in(TypeFunc::Control));
1087 }
1088 if (_memproj_fallthrough != NULL) {
1089 _igvn.replace_node(_memproj_fallthrough, alloc->in(TypeFunc::Memory));
1090 }
1091 if (_memproj_catchall != NULL) {
1092 _igvn.replace_node(_memproj_catchall, C->top());
1093 }
1094 if (_ioproj_fallthrough != NULL) {
1095 _igvn.replace_node(_ioproj_fallthrough, alloc->in(TypeFunc::I_O));
1096 }
1097 if (_ioproj_catchall != NULL) {
1098 _igvn.replace_node(_ioproj_catchall, C->top());
1099 }
1100 if (_catchallcatchproj != NULL) {
1101 _igvn.replace_node(_catchallcatchproj, C->top());
1102 }
1103 }
1104
1105 #define STACK_REG_BUFFER 4
1106
1107 bool PhaseMacroExpand::stack_allocation_location_representable(int slot_location) {
1108 // TODO This is likely not enough as there are values on the stack above the fixed slots
1109 // Revist to see if there is a better check
1110 OptoReg::Name stack_reg = OptoReg::stack2reg(slot_location + STACK_REG_BUFFER);
1111 if (RegMask::can_represent(stack_reg)) {
1112 return true;
1113 } else {
1114 return false;
1115 }
1116 }
1117
1118 #undef STACK_REG_BUFFER
1119
1120 int PhaseMacroExpand::next_stack_allocated_object(int num_slots) {
1121 int current = C->fixed_slots();
1122 int next = current + num_slots;
1123 if (!stack_allocation_location_representable(next)) {
1124 return -1;
1125 }
1126 // Keep the toplevel high water mark current:
1127 if (C->fixed_slots() < next) C->set_fixed_slots(next);
1128 return current;
1129 }
1130
1131 bool PhaseMacroExpand::process_write_barriers_on_stack_allocated_objects(AllocateNode* alloc) {
1132 GrowableArray<Node*> barriers;
1133 Node *res = alloc->result_cast();
1134 assert(res != NULL, "result node must not be null");
1135
1136 // Find direct barriers on the stack allocated objects.
1137 // Those we can simply eliminate.
1138 for (DUIterator_Fast imax, i = res->fast_outs(imax); i < imax; i++) {
1139 Node *use = res->fast_out(i);
1140 if (use->Opcode() == Op_CastP2X) {
1141 barriers.append_if_missing(use);
1142 } else if (use->is_AddP()) {
1143 for (DUIterator_Fast jmax, j = use->fast_outs(jmax); j < jmax; j++) {
1144 Node *addp_out = use->fast_out(j);
1145 if (addp_out->Opcode() == Op_CastP2X) {
1146 barriers.append_if_missing(addp_out);
1147 }
1148 }
1149 }
1150 }
1151
1152 while (barriers.length() != 0) {
1153 eliminate_gc_barrier(barriers.pop());
1154 }
1155
1156 // After removing the direct barriers result may no longer be used
1157 if (alloc->result_cast() == NULL) {
1158 return true;
1159 }
1160
1161 // Next walk all uses of the allocate to discover the barriers that
1162 // might be reachable from our allocate. If the barrier is reachable
1163 // from stack allocated object, we unregister it, so that the check
1164 // elimination code doesn't run on it.
1165 VectorSet visited(Thread::current()->resource_area());
1166 GrowableArray<Node*> node_worklist;
1167
1168 BarrierSetC2 *bs = BarrierSet::barrier_set()->barrier_set_c2();
1169
1170 node_worklist.push(res);
1171
1172 while(node_worklist.length() != 0) {
1173 Node* n = node_worklist.pop();
1174
1175 if (visited.test_set(n->_idx)) {
1176 continue; // already processed
1177 }
1178
1179 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
1180 Node *use = n->fast_out(i);
1181 if (use->Opcode() == Op_CastP2X) {
1182 bs->unregister_potential_barrier_node(use);
1183 } else if (use->is_Phi() ||
1184 use->is_CheckCastPP() ||
1185 use->is_EncodeP() ||
1186 use->is_DecodeN() ||
1187 use->is_SafePoint() ||
1188 use->is_Proj() ||
1189 (use->is_ConstraintCast() && use->Opcode() == Op_CastPP)) {
1190 // Find barriers beyond our current result
1191 node_worklist.push(use);
1192 } else if (use->is_Store() && use->Opcode() == Op_StoreP) {
1193 if (n != use->in(MemNode::ValueIn)) {
1194 continue;
1195 }
1196 // TODO code copied from escape.cpp::ConnectionGraph::get_addp_base.
1197 // Common up this code into a helper
1198 Node *memory = use->in(MemNode::Address);
1199 if (memory->is_AddP()) {
1200 Node *base = memory->in(AddPNode::Base);
1201 if (base->uncast()->is_top()) { // The AddP case #3 and #6 and #9.
1202 base = memory->in(AddPNode::Address);
1203 while (base->is_AddP()) {
1204 // Case #6 (unsafe access) may have several chained AddP nodes.
1205 assert(base->in(AddPNode::Base)->uncast()->is_top(), "expected unsafe access address only");
1206 base = base->in(AddPNode::Address);
1207 }
1208 if (base->Opcode() == Op_CheckCastPP &&
1209 base->bottom_type()->isa_rawptr() &&
1210 _igvn.type(base->in(1))->isa_oopptr()) {
1211 base = base->in(1); // Case #9
1212 }
1213 }
1214 node_worklist.push(base);
1215 }
1216 } else if (use->is_AddP() ||
1217 (use->is_Load() && use->Opcode() == Op_LoadP)) {
1218 // Find barriers for loads
1219 node_worklist.push(use);
1220 }
1221 }
1222 }
1223 return false;
1224 }
1225
1226 bool PhaseMacroExpand::register_stack_allocated_object_with_safepoints(AllocateNode* alloc, Node* stack_oop) {
1227 VectorSet visited(Thread::current()->resource_area());
1228 GrowableArray<Node*> node_worklist;
1229 GrowableArray<SafePointNode*> temp;
1230 Dict* safepoint_map = new Dict(cmpkey, hashkey);
1231 bool found_non_direct_safepoint = false;
1232 Node *res = alloc->result_cast();
1233
1234 assert(res != NULL, "result node must not be null");
1235
1236 node_worklist.push(res);
1237
1238 while(node_worklist.length() != 0) {
1239 Node* n = node_worklist.pop();
1240
1241 if (visited.test_set(n->_idx)) {
1242 continue; // already processed
1243 }
1244
1245 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
1246 Node *use = n->fast_out(i);
1247 if (use->is_SafePoint()) {
1248 SafePointNode* sfpt = use->as_SafePoint();
1249 if (sfpt->jvms() != NULL) {
1250 temp.push(sfpt);
1251 }
1252 } else if (use->is_Phi() ||
1253 use->is_CheckCastPP() ||
1254 use->is_EncodeP() ||
1255 use->is_DecodeN() ||
1256 use->is_Proj() ||
1257 (use->Opcode() == Op_CastP2X) ||
1258 use->is_MergeMem() ||
1259 use->is_MemBar() ||
1260 (use->is_ConstraintCast() && use->Opcode() == Op_CastPP)) {
1261 // Find safepoints beyond our current result
1262 node_worklist.push(use);
1263 } else if (use->is_Store() && use->Opcode() == Op_StoreP) {
1264 node_worklist.push(use);
1265 if (n != use->in(MemNode::ValueIn)) {
1266 continue;
1267 }
1268 // TODO code copied from escape.cpp::ConnectionGraph::get_addp_base.
1269 // Common up this code into a helper
1270 Node *memory = use->in(MemNode::Address);
1271 if (memory->is_AddP()) {
1272 Node *base = memory->in(AddPNode::Base);
1273 if (base->uncast()->is_top()) { // The AddP case #3 and #6 and #9.
1274 base = memory->in(AddPNode::Address);
1275 while (base->is_AddP()) {
1276 // Case #6 (unsafe access) may have several chained AddP nodes.
1277 assert(base->in(AddPNode::Base)->uncast()->is_top(), "expected unsafe access address only");
1278 base = base->in(AddPNode::Address);
1279 }
1280 if (base->Opcode() == Op_CheckCastPP &&
1281 base->bottom_type()->isa_rawptr() &&
1282 _igvn.type(base->in(1))->isa_oopptr()) {
1283 base = base->in(1); // Case #9
1284 }
1285 }
1286 node_worklist.push(base);
1287 }
1288 } else if (use->is_AddP() ||
1289 (use->is_Load() && use->Opcode() == Op_LoadP)) {
1290 // Find safepoints for arrays
1291 node_worklist.push(use);
1292 }
1293 }
1294
1295 while (temp.length() != 0) {
1296 SafePointNode* sfpt = temp.pop();
1297 if (res != n) {
1298 found_non_direct_safepoint = true;
1299 }
1300 handle_safepoint_for_stack_allocation(safepoint_map, alloc, stack_oop, n, sfpt);
1301 }
1302 }
1303
1304 return found_non_direct_safepoint;
1305 }
1306
1307 void PhaseMacroExpand::handle_safepoint_for_stack_allocation(Dict* safepoint_map, AllocateNode* alloc, Node* oop_node, Node* parent, SafePointNode* sfpt) {
1308 Node* res = alloc->result_cast();
1309 assert(res->is_CheckCastPP(), "unexpected AllocateNode result");
1310 const TypeOopPtr* res_type = _igvn.type(res)->isa_oopptr();
1311 ciKlass* klass = res_type->klass();
1312 int nfields = 0;
1313 if (res_type->isa_instptr()) {
1314 // find the fields of the class which will be needed for safepoint debug information
1315 assert(klass->is_instance_klass(), "must be an instance klass.");
1316 ciInstanceKlass* iklass = klass->as_instance_klass();
1317 nfields = iklass->nof_nonstatic_fields();
1318 } else {
1319 // find the array's elements which will be needed for safepoint debug information
1320 nfields = alloc->in(AllocateNode::ALength)->find_int_con(-1);
1321 }
1322
1323 assert(nfields >= 0, "Sanity");
1324
1325 SafePointScalarObjectNode* sobj = NULL;
1326 Node *result = (Node *)(*safepoint_map)[sfpt];
1327 if (result != NULL) {
1328 assert(result->is_SafePointScalarObject(), "Has to be a safepointscalarobject");
1329 sobj = result->as_SafePointScalarObject();
1330 } else {
1331 //
1332 // Process the safepoint uses
1333 //
1334 Node* mem = sfpt->memory();
1335 Node* ctl = sfpt->control();
1336 assert(sfpt->jvms() != NULL, "missed JVMS");
1337 // Fields of scalar objs are referenced only at the end
1338 // of regular debuginfo at the last (youngest) JVMS.
1339 // Record relative start index.
1340 uint first_ind = (sfpt->req() - sfpt->jvms()->scloff());
1341 sobj = new SafePointScalarObjectNode(res_type,
1342 #ifdef ASSERT
1343 alloc,
1344 #endif
1345 first_ind, nfields);
1346 sobj->init_req(0, C->root());
1347 sobj->add_req(oop_node);
1348 transform_later(sobj);
1349 sobj->set_stack_allocated(true);
1350
1351 JVMState *jvms = sfpt->jvms();
1352 sfpt->add_req(sobj);
1353 jvms->set_endoff(sfpt->req());
1354 _igvn._worklist.push(sfpt);
1355 safepoint_map->Insert(sfpt, sobj);
1356 }
1357
1358 if (parent == res) {
1359 adjust_safepoint_jvms(sfpt, parent, sobj);
1360 }
1361 }
1362
1363 bool PhaseMacroExpand::can_stack_allocate(AllocateNode* alloc, Node* res, intptr_t size_of_object) {
1364 return ((res != NULL) && alloc->_is_stack_allocateable && (size_of_object != -1) && should_stack_allocate());
1365 }
1366
1367 void PhaseMacroExpand::estimate_stack_allocation_size(AllocateNode* alloc) {
1368 Node* res = alloc->result_cast();
1369 Node* size_in_bytes = alloc->in(AllocateNode::AllocSize);
1370 intptr_t size_of_object = _igvn.find_intptr_t_con(size_in_bytes, -1);
1371
1372 if (alloc->_is_scalar_replaceable && !alloc->_is_stack_allocateable) {
1373 C->set_fail_stack_allocation_with_references(true);
1374 return;
1375 }
1376
1377 bool can_sa = can_stack_allocate(alloc, res, size_of_object);
1378 if (alloc->_is_stack_allocateable && !can_sa) {
1379 // If we marked the object as SA in EA and now we can not fail
1380 C->set_fail_stack_allocation_with_references(true);
1381 return;
1382 }
1383
1384 if (!alloc->_is_stack_allocateable) {
1385 // If we can not SA because EA said no then no need to count the size
1386 return;
1387 }
1388
1389 int current = C->stack_allocated_slots();
1390 C->set_stack_allocated_slots(current + (size_of_object >> LogBytesPerInt));
1391 }
1392
1393 // Do stack allocation
1394 bool PhaseMacroExpand::stack_allocation(AllocateNode* alloc) {
1395 Node* klass = alloc->in(AllocateNode::KlassNode);
1396 const TypeKlassPtr* tklass = _igvn.type(klass)->is_klassptr();
1397 Node *length = (alloc->is_AllocateArray()) ? alloc->in(AllocateNode::ALength) : NULL;
1398 Node* size_in_bytes = alloc->in(AllocateNode::AllocSize);
1399 Node* res = alloc->result_cast();
1400 Node* ctrl = alloc->in(TypeFunc::Control);
1401 Node* mem = alloc->in(TypeFunc::Memory);
1402
1403 intptr_t size_of_object = _igvn.find_intptr_t_con(size_in_bytes, -1);
1404
1405 if (!can_stack_allocate(alloc, res, size_of_object)) {
1406 return false;
1407 }
1408
1409 if (C->fail_stack_allocation_with_references()) {
1410 if (alloc->_is_referenced_stack_allocation) {
1411 #ifndef PRODUCT
1412 if (print_stack_allocation()) {
1413 tty->print_cr("---- Avoiding stack allocation on node %d because it is referenced by another alloc and SCR/SA failed in method %s", alloc->_idx, _igvn.C->method()->get_Method()->name_and_sig_as_C_string());
1414 }
1415 #endif
1416 return false;
1417 }
1418 }
1419
1420 int next_stack_allocation_slot = next_stack_allocated_object(size_of_object >> LogBytesPerInt);
1421 if (next_stack_allocation_slot < 0) {
1422 #ifndef PRODUCT
1423 if (print_stack_allocation()) {
1424 tty->print_cr("---- Avoiding stack allocation on node %d with size %ld for method %s because of insufficient stack space", alloc->_idx, size_of_object, _igvn.C->method()->get_Method()->name_and_sig_as_C_string());
1425 }
1426 #endif
1427 return false;
1428 }
1429
1430 if (mem->is_MergeMem()) {
1431 mem = mem->as_MergeMem()->memory_at(Compile::AliasIdxRaw);
1432 }
1433
1434 extract_call_projections(alloc);
1435
1436 // Process barriers as this may result in result_cast() becoming NULL
1437 if (process_write_barriers_on_stack_allocated_objects(alloc)) {
1438 #ifndef PRODUCT
1439 if (print_stack_allocation()) {
1440 tty->print_cr("---- Allocation %d result_cast is no longer used so yank the alloc instead", alloc->_idx);
1441 }
1442 #endif
1443 InitializeNode* init = alloc->initialization();
1444 if (init != NULL) {
1445 init->remove(&_igvn);
1446 }
1447 yank_alloc_node(alloc);
1448 return true;
1449 }
1450
1451 assert(res == alloc->result_cast(), "values much match");
1452
1453 Node* stack_oop = transform_later(new BoxLockNode(next_stack_allocation_slot));
1454 Node* new_raw_mem = initialize_object(alloc, ctrl, mem, stack_oop, klass, length, size_in_bytes);
1455
1456 bool non_direct_safepoints = register_stack_allocated_object_with_safepoints(alloc, stack_oop);
1457 if (non_direct_safepoints) {
1458 if (length != NULL) {
1459 stack_allocation_init_array_length_on_entry(alloc, length, stack_oop);
1460 }
1461 #ifndef PRODUCT
1462 stack_allocation_clear_object_data(alloc, stack_oop);
1463 #endif
1464 }
1465
1466 _igvn.replace_node(_resproj, stack_oop);
1467
1468 for (DUIterator_Fast imax, i = _memproj_fallthrough->fast_outs(imax); i < imax; i++) {
1469 Node *use = _memproj_fallthrough->fast_out(i);
1470 _igvn.rehash_node_delayed(use);
1471 imax -= replace_input(use, _memproj_fallthrough, new_raw_mem);
1472 // back up iterator
1473 --i;
1474 }
1475
1476 eliminate_unused_allocation_edges(alloc);
1477
1478 assert(_resproj->outcnt() == 0, "all uses of the original allocate result projection must be deleted");
1479 _igvn.remove_dead_node(_resproj);
1480
1481 #ifndef PRODUCT
1482 if (print_stack_allocation()) {
1483 tty->print_cr("++++ Performing stack allocation on node %d with size %ld for method %s", alloc->_idx, size_of_object, _igvn.C->method()->get_Method()->name_and_sig_as_C_string());
1484 }
1485 #endif
1486
1487 return true;
1488 }
1489
1490 /*
1491 Initialize stack allocated array length on entry to the method.
1492 This is required for de-opt so it can verify array lengths and so
1493 that GCs that happen after deopt will not crash for uninitialized
1494 arrays.
1495 */
1496 void PhaseMacroExpand::stack_allocation_init_array_length_on_entry(AllocateNode *alloc, Node *length, Node *stack_oop) {
1497 Node* start_mem = C->start()->proj_out_or_null(TypeFunc::Memory);
1498 assert(length != NULL, "Length can not be NULL");
1499
1500 if (C->is_osr_compilation()) {
1501 for (DUIterator_Fast imax, i = start_mem->fast_outs(imax); i < imax; i++) {
1502 Node *child = start_mem->fast_out(i);
1503 if (child->is_CallLeaf() && child->as_CallLeaf()->is_call_to_osr_migration_end()) {
1504 CallLeafNode* call_leaf = child->as_CallLeaf();
1505 start_mem = call_leaf->proj_out_or_null(TypeFunc::Memory);
1506 break;
1507 }
1508 }
1509 }
1510 assert(start_mem != NULL, "Must find start mem");
1511 Node* init_mem = start_mem;
1512
1513 // need to set the length field for arrays for deopt
1514 init_mem = make_store(C->start()->proj_out_or_null(TypeFunc::Control),
1515 init_mem, stack_oop, arrayOopDesc::length_offset_in_bytes(),
1516 length, T_INT);
1517
1518
1519 if (init_mem != start_mem) {
1520 for (DUIterator_Fast imax, i = start_mem->fast_outs(imax); i < imax; i++) {
1521 Node *use = start_mem->fast_out(i);
1522 // Compressed refs can make a new store which adjusts the start
1523 // offet and it's sourced by start_mem. Make sure we don't make cycle.
1524 if (use == init_mem || (init_mem->find_edge(use) >= 0)) {
1525 continue;
1526 }
1527 _igvn.rehash_node_delayed(use);
1528 imax -= replace_input(use, start_mem, init_mem);
1529 // back up iterator
1530 --i;
1531 }
1532 }
1533 }
1534
1535 #ifndef PRODUCT
1536 /*
1537 Initialize SA object on entry to the method to ensure it is initialized
1538 before safepoints which may only be reachable through phis and the object
1539 may not actually have been initialized.
1540 */
1541 void PhaseMacroExpand::stack_allocation_clear_object_data(AllocateNode *alloc, Node *stack_oop) {
1542 Node* klass = alloc->in(AllocateNode::KlassNode);
1543 Node *length = (alloc->is_AllocateArray()) ? alloc->in(AllocateNode::ALength) : NULL;
1544 Node* size_in_bytes = alloc->in(AllocateNode::AllocSize);
1545 Node* start_mem = C->start()->proj_out_or_null(TypeFunc::Memory);
1546 if (C->is_osr_compilation()) {
1547 for (DUIterator_Fast imax, i = start_mem->fast_outs(imax); i < imax; i++) {
1548 Node *child = start_mem->fast_out(i);
1549 if (child->is_CallLeaf() && child->as_CallLeaf()->is_call_to_osr_migration_end()) {
1550 CallLeafNode* call_leaf = child->as_CallLeaf();
1551 start_mem = call_leaf->proj_out_or_null(TypeFunc::Memory);
1552 break;
1553 }
1554 }
1555 }
1556 assert(start_mem != NULL, "Must find start mem");
1557 int header_size = alloc->minimum_header_size();
1558 Node* init_mem = start_mem;
1559 if (length != NULL) {
1560 // conservatively small header size:
1561 header_size = arrayOopDesc::base_offset_in_bytes(T_BYTE);
1562 ciKlass* k = _igvn.type(klass)->is_klassptr()->klass();
1563 if (k->is_array_klass()) { // we know the exact header size in most cases:
1564 header_size = Klass::layout_helper_header_size(k->layout_helper());
1565 }
1566 }
1567 init_mem = ClearArrayNode::clear_memory(C->start()->proj_out_or_null(TypeFunc::Control),
1568 init_mem, stack_oop, header_size, size_in_bytes,
1569 &_igvn);
1570 if (init_mem != start_mem) {
1571 for (DUIterator_Fast imax, i = start_mem->fast_outs(imax); i < imax; i++) {
1572 Node *use = start_mem->fast_out(i);
1573 // Compressed refs can make a new store which adjusts the start
1574 // offet and it's sourced by start_mem. Make sure we don't make cycle.
1575 if (use == init_mem || (init_mem->find_edge(use) >= 0)) {
1576 continue;
1577 }
1578 _igvn.rehash_node_delayed(use);
1579 imax -= replace_input(use, start_mem, init_mem);
1580 // back up iterator
1581 --i;
1582 }
1583 }
1584 }
1585 #endif
1586
1587 bool PhaseMacroExpand::eliminate_allocate_node(AllocateNode *alloc) {
1588 // Don't do scalar replacement if the frame can be popped by JVMTI:
1589 // if reallocation fails during deoptimization we'll pop all
1590 // interpreter frames for this compiled frame and that won't play
1591 // nice with JVMTI popframe.
1592 if (!EliminateAllocations || JvmtiExport::can_pop_frame() || !alloc->_is_non_escaping) {
1593 return false;
1594 }
1595 Node* klass = alloc->in(AllocateNode::KlassNode);
1596 const TypeKlassPtr* tklass = _igvn.type(klass)->is_klassptr();
1597 Node* res = alloc->result_cast();
1598 // Eliminate boxing allocations which are not used
1599 // regardless scalar replacable status.
1600 bool boxing_alloc = C->eliminate_boxing() &&
1601 tklass->klass()->is_instance_klass() &&
1602 tklass->klass()->as_instance_klass()->is_box_klass();
1603 if (!alloc->_is_scalar_replaceable && (!boxing_alloc || (res != NULL))) {
1604 return false;
1605 }
1606
1607 extract_call_projections(alloc);
1608
1609 GrowableArray <SafePointNode *> safepoints;
1610 if (!can_eliminate_allocation(alloc, safepoints)) {
1611 return false;
1612 }
1613
1614 if (!alloc->_is_scalar_replaceable) {
1615 assert(res == NULL, "sanity");
1616 // We can only eliminate allocation if all debug info references
1617 // are already replaced with SafePointScalarObject because
1618 // we can't search for a fields value without instance_id.
1619 if (safepoints.length() > 0) {
1620 return false;
1621 }
1622 }
1623
1624 if (!scalar_replacement(alloc, safepoints)) {
1625 return false;
1626 }
1627
1628 CompileLog* log = C->log();
1629 if (log != NULL) {
1630 log->head("eliminate_allocation type='%d'",
1631 log->identify(tklass->klass()));
1632 JVMState* p = alloc->jvms();
1633 while (p != NULL) {
1634 log->elem("jvms bci='%d' method='%d'", p->bci(), log->identify(p->method()));
1635 p = p->caller();
1636 }
1637 log->tail("eliminate_allocation");
1638 }
1639
1640 process_users_of_allocation(alloc);
1641
1642 #ifndef PRODUCT
1643 if (print_eliminate_allocations()) {
1644 if (alloc->is_AllocateArray())
1645 tty->print_cr("++++ Eliminated: %d AllocateArray", alloc->_idx);
1646 else
1647 tty->print_cr("++++ Eliminated: %d Allocate", alloc->_idx);
1648 }
1649 #endif
1650
1651 return true;
1652 }
1653
1654 bool PhaseMacroExpand::eliminate_boxing_node(CallStaticJavaNode *boxing) {
1655 // EA should remove all uses of non-escaping boxing node.
1656 if (!C->eliminate_boxing() || boxing->proj_out_or_null(TypeFunc::Parms) != NULL) {
1657 return false;
1658 }
1659
1660 assert(boxing->result_cast() == NULL, "unexpected boxing node result");
1661
1662 extract_call_projections(boxing);
1663
1664 const TypeTuple* r = boxing->tf()->range();
1665 assert(r->cnt() > TypeFunc::Parms, "sanity");
1666 const TypeInstPtr* t = r->field_at(TypeFunc::Parms)->isa_instptr();
1667 assert(t != NULL, "sanity");
1668
1669 CompileLog* log = C->log();
1670 if (log != NULL) {
1671 log->head("eliminate_boxing type='%d'",
1672 log->identify(t->klass()));
1673 JVMState* p = boxing->jvms();
1674 while (p != NULL) {
1675 log->elem("jvms bci='%d' method='%d'", p->bci(), log->identify(p->method()));
1676 p = p->caller();
1677 }
1678 log->tail("eliminate_boxing");
1679 }
1680
1681 process_users_of_allocation(boxing);
1682
1683 #ifndef PRODUCT
1684 if (print_eliminate_allocations()) {
1685 tty->print("++++ Eliminated: %d ", boxing->_idx);
1686 boxing->method()->print_short_name(tty);
1687 tty->cr();
1688 }
1689 #endif
1690
1691 return true;
1692 }
1693
1694 //---------------------------set_eden_pointers-------------------------
1695 void PhaseMacroExpand::set_eden_pointers(Node* &eden_top_adr, Node* &eden_end_adr) {
1696 if (UseTLAB) { // Private allocation: load from TLS
1697 Node* thread = transform_later(new ThreadLocalNode());
1698 int tlab_top_offset = in_bytes(JavaThread::tlab_top_offset());
1699 int tlab_end_offset = in_bytes(JavaThread::tlab_end_offset());
1700 eden_top_adr = basic_plus_adr(top()/*not oop*/, thread, tlab_top_offset);
1701 eden_end_adr = basic_plus_adr(top()/*not oop*/, thread, tlab_end_offset);
1702 } else { // Shared allocation: load from globals
1703 CollectedHeap* ch = Universe::heap();
1704 address top_adr = (address)ch->top_addr();
1705 address end_adr = (address)ch->end_addr();
1706 eden_top_adr = makecon(TypeRawPtr::make(top_adr));
1707 eden_end_adr = basic_plus_adr(eden_top_adr, end_adr - top_adr);
1708 }
1709 }
1710
1711
1712 Node* PhaseMacroExpand::make_load(Node* ctl, Node* mem, Node* base, int offset, const Type* value_type, BasicType bt) {
1713 Node* adr = basic_plus_adr(base, offset);
1714 const TypePtr* adr_type = adr->bottom_type()->is_ptr();
1715 Node* value = LoadNode::make(_igvn, ctl, mem, adr, adr_type, value_type, bt, MemNode::unordered);
1716 transform_later(value);
1717 return value;
1718 }
1719
1720
1721 Node* PhaseMacroExpand::make_store(Node* ctl, Node* mem, Node* base, int offset, Node* value, BasicType bt) {
1722 Node* adr = basic_plus_adr(base, offset);
1723 mem = StoreNode::make(_igvn, ctl, mem, adr, NULL, value, bt, MemNode::unordered);
1724 transform_later(mem);
1725 return mem;
1726 }
1727
1728 //=============================================================================
1729 //
1730 // A L L O C A T I O N
1731 //
1732 // Allocation attempts to be fast in the case of frequent small objects.
1733 // It breaks down like this:
1734 //
1735 // 1) Size in doublewords is computed. This is a constant for objects and
1736 // variable for most arrays. Doubleword units are used to avoid size
1737 // overflow of huge doubleword arrays. We need doublewords in the end for
1738 // rounding.
1739 //
1740 // 2) Size is checked for being 'too large'. Too-large allocations will go
1741 // the slow path into the VM. The slow path can throw any required
1742 // exceptions, and does all the special checks for very large arrays. The
1743 // size test can constant-fold away for objects. For objects with
1744 // finalizers it constant-folds the otherway: you always go slow with
1745 // finalizers.
1746 //
1747 // 3) If NOT using TLABs, this is the contended loop-back point.
1748 // Load-Locked the heap top. If using TLABs normal-load the heap top.
1749 //
1750 // 4) Check that heap top + size*8 < max. If we fail go the slow ` route.
1751 // NOTE: "top+size*8" cannot wrap the 4Gig line! Here's why: for largish
1752 // "size*8" we always enter the VM, where "largish" is a constant picked small
1753 // enough that there's always space between the eden max and 4Gig (old space is
1754 // there so it's quite large) and large enough that the cost of entering the VM
1755 // is dwarfed by the cost to initialize the space.
1756 //
1757 // 5) If NOT using TLABs, Store-Conditional the adjusted heap top back
1758 // down. If contended, repeat at step 3. If using TLABs normal-store
1759 // adjusted heap top back down; there is no contention.
1760 //
1761 // 6) If !ZeroTLAB then Bulk-clear the object/array. Fill in klass & mark
1762 // fields.
1763 //
1764 // 7) Merge with the slow-path; cast the raw memory pointer to the correct
1765 // oop flavor.
1766 //
1767 //=============================================================================
1768 // FastAllocateSizeLimit value is in DOUBLEWORDS.
1769 // Allocations bigger than this always go the slow route.
1770 // This value must be small enough that allocation attempts that need to
1771 // trigger exceptions go the slow route. Also, it must be small enough so
1772 // that heap_top + size_in_bytes does not wrap around the 4Gig limit.
1773 //=============================================================================j//
1774 // %%% Here is an old comment from parseHelper.cpp; is it outdated?
1775 // The allocator will coalesce int->oop copies away. See comment in
1776 // coalesce.cpp about how this works. It depends critically on the exact
1777 // code shape produced here, so if you are changing this code shape
1778 // make sure the GC info for the heap-top is correct in and around the
1779 // slow-path call.
1780 //
1781
1782 void PhaseMacroExpand::expand_allocate_common(
1783 AllocateNode* alloc, // allocation node to be expanded
1784 Node* length, // array length for an array allocation
1785 const TypeFunc* slow_call_type, // Type of slow call
1786 address slow_call_address // Address of slow call
1787 )
1788 {
1789 Node* ctrl = alloc->in(TypeFunc::Control);
1790 Node* mem = alloc->in(TypeFunc::Memory);
1791 Node* i_o = alloc->in(TypeFunc::I_O);
1792 Node* size_in_bytes = alloc->in(AllocateNode::AllocSize);
1793 Node* klass_node = alloc->in(AllocateNode::KlassNode);
1794 Node* initial_slow_test = alloc->in(AllocateNode::InitialTest);
1795 assert(ctrl != NULL, "must have control");
1796
1797 // We need a Region and corresponding Phi's to merge the slow-path and fast-path results.
1798 // they will not be used if "always_slow" is set
1799 enum { slow_result_path = 1, fast_result_path = 2 };
1800 Node *result_region = NULL;
1801 Node *result_phi_rawmem = NULL;
1802 Node *result_phi_rawoop = NULL;
1803 Node *result_phi_i_o = NULL;
1804
1805 // The initial slow comparison is a size check, the comparison
1806 // we want to do is a BoolTest::gt
1807 bool expand_fast_path = true;
1808 int tv = _igvn.find_int_con(initial_slow_test, -1);
1809 if (tv >= 0) {
1810 // InitialTest has constant result
1811 // 0 - can fit in TLAB
1812 // 1 - always too big or negative
1813 assert(tv <= 1, "0 or 1 if a constant");
1814 expand_fast_path = (tv == 0);
1815 initial_slow_test = NULL;
1816 } else {
1817 initial_slow_test = BoolNode::make_predicate(initial_slow_test, &_igvn);
1818 }
1819
1820 if (C->env()->dtrace_alloc_probes() ||
1821 (!UseTLAB && !Universe::heap()->supports_inline_contig_alloc())) {
1822 // Force slow-path allocation
1823 expand_fast_path = false;
1824 initial_slow_test = NULL;
1825 }
1826
1827 bool allocation_has_use = (alloc->result_cast() != NULL);
1828 if (!allocation_has_use) {
1829 InitializeNode* init = alloc->initialization();
1830 if (init != NULL) {
1831 init->remove(&_igvn);
1832 }
1833 if (expand_fast_path && (initial_slow_test == NULL)) {
1834 // Remove allocation node and return.
1835 // Size is a non-negative constant -> no initial check needed -> directly to fast path.
1836 // Also, no usages -> empty fast path -> no fall out to slow path -> nothing left.
1837 #ifndef PRODUCT
1838 if (PrintEliminateAllocations) {
1839 tty->print("NotUsed ");
1840 Node* res = alloc->proj_out_or_null(TypeFunc::Parms);
1841 if (res != NULL) {
1842 res->dump();
1843 } else {
1844 alloc->dump();
1845 }
1846 }
1847 #endif
1848 yank_alloc_node(alloc);
1849 return;
1850 }
1851 }
1852
1853 enum { too_big_or_final_path = 1, need_gc_path = 2 };
1854 Node *slow_region = NULL;
1855 Node *toobig_false = ctrl;
1856
1857 // generate the initial test if necessary
1858 if (initial_slow_test != NULL ) {
1859 assert (expand_fast_path, "Only need test if there is a fast path");
1860 slow_region = new RegionNode(3);
1861
1862 // Now make the initial failure test. Usually a too-big test but
1863 // might be a TRUE for finalizers or a fancy class check for
1864 // newInstance0.
1865 IfNode *toobig_iff = new IfNode(ctrl, initial_slow_test, PROB_MIN, COUNT_UNKNOWN);
1866 transform_later(toobig_iff);
1867 // Plug the failing-too-big test into the slow-path region
1868 Node *toobig_true = new IfTrueNode( toobig_iff );
1869 transform_later(toobig_true);
1870 slow_region ->init_req( too_big_or_final_path, toobig_true );
1871 toobig_false = new IfFalseNode( toobig_iff );
1872 transform_later(toobig_false);
1873 } else {
1874 // No initial test, just fall into next case
1875 assert(allocation_has_use || !expand_fast_path, "Should already have been handled");
1876 toobig_false = ctrl;
1877 debug_only(slow_region = NodeSentinel);
1878 }
1879
1880 // If we are here there are several possibilities
1881 // - expand_fast_path is false - then only a slow path is expanded. That's it.
1882 // no_initial_check means a constant allocation.
1883 // - If check always evaluates to false -> expand_fast_path is false (see above)
1884 // - If check always evaluates to true -> directly into fast path (but may bailout to slowpath)
1885 // if !allocation_has_use the fast path is empty
1886 // if !allocation_has_use && no_initial_check
1887 // - Then there are no fastpath that can fall out to slowpath -> no allocation code at all.
1888 // removed by yank_alloc_node above.
1889
1890 Node *slow_mem = mem; // save the current memory state for slow path
1891 // generate the fast allocation code unless we know that the initial test will always go slow
1892 if (expand_fast_path) {
1893 // Fast path modifies only raw memory.
1894 if (mem->is_MergeMem()) {
1895 mem = mem->as_MergeMem()->memory_at(Compile::AliasIdxRaw);
1896 }
1897
1898 // allocate the Region and Phi nodes for the result
1899 result_region = new RegionNode(3);
1900 result_phi_rawmem = new PhiNode(result_region, Type::MEMORY, TypeRawPtr::BOTTOM);
1901 result_phi_i_o = new PhiNode(result_region, Type::ABIO); // I/O is used for Prefetch
1902
1903 // Grab regular I/O before optional prefetch may change it.
1904 // Slow-path does no I/O so just set it to the original I/O.
1905 result_phi_i_o->init_req(slow_result_path, i_o);
1906
1907 // Name successful fast-path variables
1908 Node* fast_oop_ctrl;
1909 Node* fast_oop_rawmem;
1910 if (allocation_has_use) {
1911 Node* needgc_ctrl = NULL;
1912 result_phi_rawoop = new PhiNode(result_region, TypeRawPtr::BOTTOM);
1913
1914 intx prefetch_lines = length != NULL ? AllocatePrefetchLines : AllocateInstancePrefetchLines;
1915 BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
1916 Node* fast_oop = bs->obj_allocate(this, ctrl, mem, toobig_false, size_in_bytes, i_o, needgc_ctrl,
1917 fast_oop_ctrl, fast_oop_rawmem,
1918 prefetch_lines);
1919
1920 if (initial_slow_test != NULL) {
1921 // This completes all paths into the slow merge point
1922 slow_region->init_req(need_gc_path, needgc_ctrl);
1923 transform_later(slow_region);
1924 } else {
1925 // No initial slow path needed!
1926 // Just fall from the need-GC path straight into the VM call.
1927 slow_region = needgc_ctrl;
1928 }
1929
1930 InitializeNode* init = alloc->initialization();
1931 fast_oop_rawmem = initialize_object(alloc,
1932 fast_oop_ctrl, fast_oop_rawmem, fast_oop,
1933 klass_node, length, size_in_bytes);
1934 expand_initialize_membar(alloc, init, fast_oop_ctrl, fast_oop_rawmem);
1935 expand_dtrace_alloc_probe(alloc, fast_oop, fast_oop_ctrl, fast_oop_rawmem);
1936
1937 result_phi_rawoop->init_req(fast_result_path, fast_oop);
1938 } else {
1939 assert (initial_slow_test != NULL, "sanity");
1940 fast_oop_ctrl = toobig_false;
1941 fast_oop_rawmem = mem;
1942 transform_later(slow_region);
1943 }
1944
1945 // Plug in the successful fast-path into the result merge point
1946 result_region ->init_req(fast_result_path, fast_oop_ctrl);
1947 result_phi_i_o ->init_req(fast_result_path, i_o);
1948 result_phi_rawmem->init_req(fast_result_path, fast_oop_rawmem);
1949 } else {
1950 slow_region = ctrl;
1951 result_phi_i_o = i_o; // Rename it to use in the following code.
1952 }
1953
1954 // Generate slow-path call
1955 CallNode *call = new CallStaticJavaNode(slow_call_type, slow_call_address,
1956 OptoRuntime::stub_name(slow_call_address),
1957 alloc->jvms()->bci(),
1958 TypePtr::BOTTOM);
1959 call->init_req(TypeFunc::Control, slow_region);
1960 call->init_req(TypeFunc::I_O, top()); // does no i/o
1961 call->init_req(TypeFunc::Memory, slow_mem); // may gc ptrs
1962 call->init_req(TypeFunc::ReturnAdr, alloc->in(TypeFunc::ReturnAdr));
1963 call->init_req(TypeFunc::FramePtr, alloc->in(TypeFunc::FramePtr));
1964
1965 call->init_req(TypeFunc::Parms+0, klass_node);
1966 if (length != NULL) {
1967 call->init_req(TypeFunc::Parms+1, length);
1968 }
1969
1970 // Copy debug information and adjust JVMState information, then replace
1971 // allocate node with the call
1972 copy_call_debug_info((CallNode *) alloc, call);
1973 if (expand_fast_path) {
1974 call->set_cnt(PROB_UNLIKELY_MAG(4)); // Same effect as RC_UNCOMMON.
1975 } else {
1976 // Hook i_o projection to avoid its elimination during allocation
1977 // replacement (when only a slow call is generated).
1978 call->set_req(TypeFunc::I_O, result_phi_i_o);
1979 }
1980 _igvn.replace_node(alloc, call);
1981 transform_later(call);
1982
1983 // Identify the output projections from the allocate node and
1984 // adjust any references to them.
1985 // The control and io projections look like:
1986 //
1987 // v---Proj(ctrl) <-----+ v---CatchProj(ctrl)
1988 // Allocate Catch
1989 // ^---Proj(io) <-------+ ^---CatchProj(io)
1990 //
1991 // We are interested in the CatchProj nodes.
1992 //
1993 extract_call_projections(call);
1994
1995 // An allocate node has separate memory projections for the uses on
1996 // the control and i_o paths. Replace the control memory projection with
1997 // result_phi_rawmem (unless we are only generating a slow call when
1998 // both memory projections are combined)
1999 if (expand_fast_path && _memproj_fallthrough != NULL) {
2000 migrate_outs(_memproj_fallthrough, result_phi_rawmem);
2001 }
2002 // Now change uses of _memproj_catchall to use _memproj_fallthrough and delete
2003 // _memproj_catchall so we end up with a call that has only 1 memory projection.
2004 if (_memproj_catchall != NULL ) {
2005 if (_memproj_fallthrough == NULL) {
2006 _memproj_fallthrough = new ProjNode(call, TypeFunc::Memory);
2007 transform_later(_memproj_fallthrough);
2008 }
2009 migrate_outs(_memproj_catchall, _memproj_fallthrough);
2010 _igvn.remove_dead_node(_memproj_catchall);
2011 }
2012
2013 // An allocate node has separate i_o projections for the uses on the control
2014 // and i_o paths. Always replace the control i_o projection with result i_o
2015 // otherwise incoming i_o become dead when only a slow call is generated
2016 // (it is different from memory projections where both projections are
2017 // combined in such case).
2018 if (_ioproj_fallthrough != NULL) {
2019 migrate_outs(_ioproj_fallthrough, result_phi_i_o);
2020 }
2021 // Now change uses of _ioproj_catchall to use _ioproj_fallthrough and delete
2022 // _ioproj_catchall so we end up with a call that has only 1 i_o projection.
2023 if (_ioproj_catchall != NULL ) {
2024 if (_ioproj_fallthrough == NULL) {
2025 _ioproj_fallthrough = new ProjNode(call, TypeFunc::I_O);
2026 transform_later(_ioproj_fallthrough);
2027 }
2028 migrate_outs(_ioproj_catchall, _ioproj_fallthrough);
2029 _igvn.remove_dead_node(_ioproj_catchall);
2030 }
2031
2032 // if we generated only a slow call, we are done
2033 if (!expand_fast_path) {
2034 // Now we can unhook i_o.
2035 if (result_phi_i_o->outcnt() > 1) {
2036 call->set_req(TypeFunc::I_O, top());
2037 } else {
2038 assert(result_phi_i_o->unique_ctrl_out() == call, "sanity");
2039 // Case of new array with negative size known during compilation.
2040 // AllocateArrayNode::Ideal() optimization disconnect unreachable
2041 // following code since call to runtime will throw exception.
2042 // As result there will be no users of i_o after the call.
2043 // Leave i_o attached to this call to avoid problems in preceding graph.
2044 }
2045 return;
2046 }
2047
2048 if (_fallthroughcatchproj != NULL) {
2049 ctrl = _fallthroughcatchproj->clone();
2050 transform_later(ctrl);
2051 _igvn.replace_node(_fallthroughcatchproj, result_region);
2052 } else {
2053 ctrl = top();
2054 }
2055 Node *slow_result;
2056 if (_resproj == NULL) {
2057 // no uses of the allocation result
2058 slow_result = top();
2059 } else {
2060 slow_result = _resproj->clone();
2061 transform_later(slow_result);
2062 _igvn.replace_node(_resproj, result_phi_rawoop);
2063 }
2064
2065 // Plug slow-path into result merge point
2066 result_region->init_req( slow_result_path, ctrl);
2067 transform_later(result_region);
2068 if (allocation_has_use) {
2069 result_phi_rawoop->init_req(slow_result_path, slow_result);
2070 transform_later(result_phi_rawoop);
2071 }
2072 result_phi_rawmem->init_req(slow_result_path, _memproj_fallthrough);
2073 transform_later(result_phi_rawmem);
2074 transform_later(result_phi_i_o);
2075 // This completes all paths into the result merge point
2076 }
2077
2078 // Remove alloc node that has no uses.
2079 void PhaseMacroExpand::yank_alloc_node(AllocateNode* alloc) {
2080 Node* ctrl = alloc->in(TypeFunc::Control);
2081 Node* mem = alloc->in(TypeFunc::Memory);
2082 Node* i_o = alloc->in(TypeFunc::I_O);
2083
2084 extract_call_projections(alloc);
2085 if (_resproj != NULL) {
2086 for (DUIterator_Fast imax, i = _resproj->fast_outs(imax); i < imax; i++) {
2087 Node* use = _resproj->fast_out(i);
2088 use->isa_MemBar()->remove(&_igvn);
2089 --imax;
2090 --i; // back up iterator
2091 }
2092 assert(_resproj->outcnt() == 0, "all uses must be deleted");
2093 _igvn.remove_dead_node(_resproj);
2094 }
2095 if (_fallthroughcatchproj != NULL) {
2096 migrate_outs(_fallthroughcatchproj, ctrl);
2097 _igvn.remove_dead_node(_fallthroughcatchproj);
2098 }
2099 if (_catchallcatchproj != NULL) {
2100 _igvn.rehash_node_delayed(_catchallcatchproj);
2101 _catchallcatchproj->set_req(0, top());
2102 }
2103 if (_fallthroughproj != NULL) {
2104 Node* catchnode = _fallthroughproj->unique_ctrl_out();
2105 _igvn.remove_dead_node(catchnode);
2106 _igvn.remove_dead_node(_fallthroughproj);
2107 }
2108 if (_memproj_fallthrough != NULL) {
2109 migrate_outs(_memproj_fallthrough, mem);
2110 _igvn.remove_dead_node(_memproj_fallthrough);
2111 }
2112 if (_ioproj_fallthrough != NULL) {
2113 migrate_outs(_ioproj_fallthrough, i_o);
2114 _igvn.remove_dead_node(_ioproj_fallthrough);
2115 }
2116 if (_memproj_catchall != NULL) {
2117 _igvn.rehash_node_delayed(_memproj_catchall);
2118 _memproj_catchall->set_req(0, top());
2119 }
2120 if (_ioproj_catchall != NULL) {
2121 _igvn.rehash_node_delayed(_ioproj_catchall);
2122 _ioproj_catchall->set_req(0, top());
2123 }
2124 #ifndef PRODUCT
2125 if (PrintEliminateAllocations) {
2126 if (alloc->is_AllocateArray()) {
2127 tty->print_cr("++++ Eliminated: %d AllocateArray", alloc->_idx);
2128 } else {
2129 tty->print_cr("++++ Eliminated: %d Allocate", alloc->_idx);
2130 }
2131 }
2132 #endif
2133 _igvn.remove_dead_node(alloc);
2134 }
2135
2136 void PhaseMacroExpand::expand_initialize_membar(AllocateNode* alloc, InitializeNode* init,
2137 Node*& fast_oop_ctrl, Node*& fast_oop_rawmem) {
2138 // If initialization is performed by an array copy, any required
2139 // MemBarStoreStore was already added. If the object does not
2140 // escape no need for a MemBarStoreStore. If the object does not
2141 // escape in its initializer and memory barrier (MemBarStoreStore or
2142 // stronger) is already added at exit of initializer, also no need
2143 // for a MemBarStoreStore. Otherwise we need a MemBarStoreStore
2144 // so that stores that initialize this object can't be reordered
2145 // with a subsequent store that makes this object accessible by
2146 // other threads.
2147 // Other threads include java threads and JVM internal threads
2148 // (for example concurrent GC threads). Current concurrent GC
2149 // implementation: G1 will not scan newly created object,
2150 // so it's safe to skip storestore barrier when allocation does
2151 // not escape.
2152 if (!alloc->does_not_escape_thread() &&
2153 !alloc->is_allocation_MemBar_redundant() &&
2154 (init == NULL || !init->is_complete_with_arraycopy())) {
2155 if (init == NULL || init->req() < InitializeNode::RawStores) {
2156 // No InitializeNode or no stores captured by zeroing
2157 // elimination. Simply add the MemBarStoreStore after object
2158 // initialization.
2159 MemBarNode* mb = MemBarNode::make(C, Op_MemBarStoreStore, Compile::AliasIdxBot);
2160 transform_later(mb);
2161
2162 mb->init_req(TypeFunc::Memory, fast_oop_rawmem);
2163 mb->init_req(TypeFunc::Control, fast_oop_ctrl);
2164 fast_oop_ctrl = new ProjNode(mb, TypeFunc::Control);
2165 transform_later(fast_oop_ctrl);
2166 fast_oop_rawmem = new ProjNode(mb, TypeFunc::Memory);
2167 transform_later(fast_oop_rawmem);
2168 } else {
2169 // Add the MemBarStoreStore after the InitializeNode so that
2170 // all stores performing the initialization that were moved
2171 // before the InitializeNode happen before the storestore
2172 // barrier.
2173
2174 Node* init_ctrl = init->proj_out_or_null(TypeFunc::Control);
2175 Node* init_mem = init->proj_out_or_null(TypeFunc::Memory);
2176
2177 MemBarNode* mb = MemBarNode::make(C, Op_MemBarStoreStore, Compile::AliasIdxBot);
2178 transform_later(mb);
2179
2180 Node* ctrl = new ProjNode(init, TypeFunc::Control);
2181 transform_later(ctrl);
2182 Node* mem = new ProjNode(init, TypeFunc::Memory);
2183 transform_later(mem);
2184
2185 // The MemBarStoreStore depends on control and memory coming
2186 // from the InitializeNode
2187 mb->init_req(TypeFunc::Memory, mem);
2188 mb->init_req(TypeFunc::Control, ctrl);
2189
2190 ctrl = new ProjNode(mb, TypeFunc::Control);
2191 transform_later(ctrl);
2192 mem = new ProjNode(mb, TypeFunc::Memory);
2193 transform_later(mem);
2194
2195 // All nodes that depended on the InitializeNode for control
2196 // and memory must now depend on the MemBarNode that itself
2197 // depends on the InitializeNode
2198 if (init_ctrl != NULL) {
2199 _igvn.replace_node(init_ctrl, ctrl);
2200 }
2201 if (init_mem != NULL) {
2202 _igvn.replace_node(init_mem, mem);
2203 }
2204 }
2205 }
2206 }
2207
2208 void PhaseMacroExpand::expand_dtrace_alloc_probe(AllocateNode* alloc, Node* oop,
2209 Node*& ctrl, Node*& rawmem) {
2210 if (C->env()->dtrace_extended_probes()) {
2211 // Slow-path call
2212 int size = TypeFunc::Parms + 2;
2213 CallLeafNode *call = new CallLeafNode(OptoRuntime::dtrace_object_alloc_Type(),
2214 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc_base),
2215 "dtrace_object_alloc",
2216 TypeRawPtr::BOTTOM);
2217
2218 // Get base of thread-local storage area
2219 Node* thread = new ThreadLocalNode();
2220 transform_later(thread);
2221
2222 call->init_req(TypeFunc::Parms + 0, thread);
2223 call->init_req(TypeFunc::Parms + 1, oop);
2224 call->init_req(TypeFunc::Control, ctrl);
2225 call->init_req(TypeFunc::I_O , top()); // does no i/o
2226 call->init_req(TypeFunc::Memory , ctrl);
2227 call->init_req(TypeFunc::ReturnAdr, alloc->in(TypeFunc::ReturnAdr));
2228 call->init_req(TypeFunc::FramePtr, alloc->in(TypeFunc::FramePtr));
2229 transform_later(call);
2230 ctrl = new ProjNode(call, TypeFunc::Control);
2231 transform_later(ctrl);
2232 rawmem = new ProjNode(call, TypeFunc::Memory);
2233 transform_later(rawmem);
2234 }
2235 }
2236
2237 // Helper for PhaseMacroExpand::expand_allocate_common.
2238 // Initializes the newly-allocated storage.
2239 Node*
2240 PhaseMacroExpand::initialize_object(AllocateNode* alloc,
2241 Node* control, Node* rawmem, Node* object,
2242 Node* klass_node, Node* length,
2243 Node* size_in_bytes) {
2244 InitializeNode* init = alloc->initialization();
2245 // Store the klass & mark bits
2246 Node* mark_node = alloc->make_ideal_mark(&_igvn, object, control, rawmem);
2247 if (!mark_node->is_Con()) {
2248 transform_later(mark_node);
2249 }
2250 rawmem = make_store(control, rawmem, object, oopDesc::mark_offset_in_bytes(), mark_node, TypeX_X->basic_type());
2251
2252 rawmem = make_store(control, rawmem, object, oopDesc::klass_offset_in_bytes(), klass_node, T_METADATA);
2253 int header_size = alloc->minimum_header_size(); // conservatively small
2254
2255 // Array length
2256 if (length != NULL) { // Arrays need length field
2257 rawmem = make_store(control, rawmem, object, arrayOopDesc::length_offset_in_bytes(), length, T_INT);
2258 // conservatively small header size:
2259 header_size = arrayOopDesc::base_offset_in_bytes(T_BYTE);
2260 ciKlass* k = _igvn.type(klass_node)->is_klassptr()->klass();
2261 if (k->is_array_klass()) // we know the exact header size in most cases:
2262 header_size = Klass::layout_helper_header_size(k->layout_helper());
2263 }
2264
2265 // Clear the object body, if necessary.
2266 if (init == NULL) {
2267 // The init has somehow disappeared; be cautious and clear everything.
2268 //
2269 // This can happen if a node is allocated but an uncommon trap occurs
2270 // immediately. In this case, the Initialize gets associated with the
2271 // trap, and may be placed in a different (outer) loop, if the Allocate
2272 // is in a loop. If (this is rare) the inner loop gets unrolled, then
2273 // there can be two Allocates to one Initialize. The answer in all these
2274 // edge cases is safety first. It is always safe to clear immediately
2275 // within an Allocate, and then (maybe or maybe not) clear some more later.
2276 if (!(UseTLAB && ZeroTLAB)) {
2277 rawmem = ClearArrayNode::clear_memory(control, rawmem, object,
2278 header_size, size_in_bytes,
2279 &_igvn);
2280 }
2281 } else {
2282 if (!init->is_complete()) {
2283 // Try to win by zeroing only what the init does not store.
2284 // We can also try to do some peephole optimizations,
2285 // such as combining some adjacent subword stores.
2286 rawmem = init->complete_stores(control, rawmem, object,
2287 header_size, size_in_bytes, &_igvn);
2288 }
2289 // We have no more use for this link, since the AllocateNode goes away:
2290 init->set_req(InitializeNode::RawAddress, top());
2291 // (If we keep the link, it just confuses the register allocator,
2292 // who thinks he sees a real use of the address by the membar.)
2293 }
2294
2295 return rawmem;
2296 }
2297
2298 // Generate prefetch instructions for next allocations.
2299 Node* PhaseMacroExpand::prefetch_allocation(Node* i_o, Node*& needgc_false,
2300 Node*& contended_phi_rawmem,
2301 Node* old_eden_top, Node* new_eden_top,
2302 intx lines) {
2303 enum { fall_in_path = 1, pf_path = 2 };
2304 if( UseTLAB && AllocatePrefetchStyle == 2 ) {
2305 // Generate prefetch allocation with watermark check.
2306 // As an allocation hits the watermark, we will prefetch starting
2307 // at a "distance" away from watermark.
2308
2309 Node *pf_region = new RegionNode(3);
2310 Node *pf_phi_rawmem = new PhiNode( pf_region, Type::MEMORY,
2311 TypeRawPtr::BOTTOM );
2312 // I/O is used for Prefetch
2313 Node *pf_phi_abio = new PhiNode( pf_region, Type::ABIO );
2314
2315 Node *thread = new ThreadLocalNode();
2316 transform_later(thread);
2317
2318 Node *eden_pf_adr = new AddPNode( top()/*not oop*/, thread,
2319 _igvn.MakeConX(in_bytes(JavaThread::tlab_pf_top_offset())) );
2320 transform_later(eden_pf_adr);
2321
2322 Node *old_pf_wm = new LoadPNode(needgc_false,
2323 contended_phi_rawmem, eden_pf_adr,
2324 TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM,
2325 MemNode::unordered);
2326 transform_later(old_pf_wm);
2327
2328 // check against new_eden_top
2329 Node *need_pf_cmp = new CmpPNode( new_eden_top, old_pf_wm );
2330 transform_later(need_pf_cmp);
2331 Node *need_pf_bol = new BoolNode( need_pf_cmp, BoolTest::ge );
2332 transform_later(need_pf_bol);
2333 IfNode *need_pf_iff = new IfNode( needgc_false, need_pf_bol,
2334 PROB_UNLIKELY_MAG(4), COUNT_UNKNOWN );
2335 transform_later(need_pf_iff);
2336
2337 // true node, add prefetchdistance
2338 Node *need_pf_true = new IfTrueNode( need_pf_iff );
2339 transform_later(need_pf_true);
2340
2341 Node *need_pf_false = new IfFalseNode( need_pf_iff );
2342 transform_later(need_pf_false);
2343
2344 Node *new_pf_wmt = new AddPNode( top(), old_pf_wm,
2345 _igvn.MakeConX(AllocatePrefetchDistance) );
2346 transform_later(new_pf_wmt );
2347 new_pf_wmt->set_req(0, need_pf_true);
2348
2349 Node *store_new_wmt = new StorePNode(need_pf_true,
2350 contended_phi_rawmem, eden_pf_adr,
2351 TypeRawPtr::BOTTOM, new_pf_wmt,
2352 MemNode::unordered);
2353 transform_later(store_new_wmt);
2354
2355 // adding prefetches
2356 pf_phi_abio->init_req( fall_in_path, i_o );
2357
2358 Node *prefetch_adr;
2359 Node *prefetch;
2360 uint step_size = AllocatePrefetchStepSize;
2361 uint distance = 0;
2362
2363 for ( intx i = 0; i < lines; i++ ) {
2364 prefetch_adr = new AddPNode( old_pf_wm, new_pf_wmt,
2365 _igvn.MakeConX(distance) );
2366 transform_later(prefetch_adr);
2367 prefetch = new PrefetchAllocationNode( i_o, prefetch_adr );
2368 transform_later(prefetch);
2369 distance += step_size;
2370 i_o = prefetch;
2371 }
2372 pf_phi_abio->set_req( pf_path, i_o );
2373
2374 pf_region->init_req( fall_in_path, need_pf_false );
2375 pf_region->init_req( pf_path, need_pf_true );
2376
2377 pf_phi_rawmem->init_req( fall_in_path, contended_phi_rawmem );
2378 pf_phi_rawmem->init_req( pf_path, store_new_wmt );
2379
2380 transform_later(pf_region);
2381 transform_later(pf_phi_rawmem);
2382 transform_later(pf_phi_abio);
2383
2384 needgc_false = pf_region;
2385 contended_phi_rawmem = pf_phi_rawmem;
2386 i_o = pf_phi_abio;
2387 } else if( UseTLAB && AllocatePrefetchStyle == 3 ) {
2388 // Insert a prefetch instruction for each allocation.
2389 // This code is used to generate 1 prefetch instruction per cache line.
2390
2391 // Generate several prefetch instructions.
2392 uint step_size = AllocatePrefetchStepSize;
2393 uint distance = AllocatePrefetchDistance;
2394
2395 // Next cache address.
2396 Node *cache_adr = new AddPNode(old_eden_top, old_eden_top,
2397 _igvn.MakeConX(step_size + distance));
2398 transform_later(cache_adr);
2399 cache_adr = new CastP2XNode(needgc_false, cache_adr);
2400 transform_later(cache_adr);
2401 // Address is aligned to execute prefetch to the beginning of cache line size
2402 // (it is important when BIS instruction is used on SPARC as prefetch).
2403 Node* mask = _igvn.MakeConX(~(intptr_t)(step_size-1));
2404 cache_adr = new AndXNode(cache_adr, mask);
2405 transform_later(cache_adr);
2406 cache_adr = new CastX2PNode(cache_adr);
2407 transform_later(cache_adr);
2408
2409 // Prefetch
2410 Node *prefetch = new PrefetchAllocationNode( contended_phi_rawmem, cache_adr );
2411 prefetch->set_req(0, needgc_false);
2412 transform_later(prefetch);
2413 contended_phi_rawmem = prefetch;
2414 Node *prefetch_adr;
2415 distance = step_size;
2416 for ( intx i = 1; i < lines; i++ ) {
2417 prefetch_adr = new AddPNode( cache_adr, cache_adr,
2418 _igvn.MakeConX(distance) );
2419 transform_later(prefetch_adr);
2420 prefetch = new PrefetchAllocationNode( contended_phi_rawmem, prefetch_adr );
2421 transform_later(prefetch);
2422 distance += step_size;
2423 contended_phi_rawmem = prefetch;
2424 }
2425 } else if( AllocatePrefetchStyle > 0 ) {
2426 // Insert a prefetch for each allocation only on the fast-path
2427 Node *prefetch_adr;
2428 Node *prefetch;
2429 // Generate several prefetch instructions.
2430 uint step_size = AllocatePrefetchStepSize;
2431 uint distance = AllocatePrefetchDistance;
2432 for ( intx i = 0; i < lines; i++ ) {
2433 prefetch_adr = new AddPNode( old_eden_top, new_eden_top,
2434 _igvn.MakeConX(distance) );
2435 transform_later(prefetch_adr);
2436 prefetch = new PrefetchAllocationNode( i_o, prefetch_adr );
2437 // Do not let it float too high, since if eden_top == eden_end,
2438 // both might be null.
2439 if( i == 0 ) { // Set control for first prefetch, next follows it
2440 prefetch->init_req(0, needgc_false);
2441 }
2442 transform_later(prefetch);
2443 distance += step_size;
2444 i_o = prefetch;
2445 }
2446 }
2447 return i_o;
2448 }
2449
2450
2451 void PhaseMacroExpand::expand_allocate(AllocateNode *alloc) {
2452 expand_allocate_common(alloc, NULL,
2453 OptoRuntime::new_instance_Type(),
2454 OptoRuntime::new_instance_Java());
2455 }
2456
2457 void PhaseMacroExpand::expand_allocate_array(AllocateArrayNode *alloc) {
2458 Node* length = alloc->in(AllocateNode::ALength);
2459 InitializeNode* init = alloc->initialization();
2460 Node* klass_node = alloc->in(AllocateNode::KlassNode);
2461 ciKlass* k = _igvn.type(klass_node)->is_klassptr()->klass();
2462 address slow_call_address; // Address of slow call
2463 if (init != NULL && init->is_complete_with_arraycopy() &&
2464 k->is_type_array_klass()) {
2465 // Don't zero type array during slow allocation in VM since
2466 // it will be initialized later by arraycopy in compiled code.
2467 slow_call_address = OptoRuntime::new_array_nozero_Java();
2468 } else {
2469 slow_call_address = OptoRuntime::new_array_Java();
2470 }
2471 expand_allocate_common(alloc, length,
2472 OptoRuntime::new_array_Type(),
2473 slow_call_address);
2474 }
2475
2476 //-------------------mark_eliminated_box----------------------------------
2477 //
2478 // During EA obj may point to several objects but after few ideal graph
2479 // transformations (CCP) it may point to only one non escaping object
2480 // (but still using phi), corresponding locks and unlocks will be marked
2481 // for elimination. Later obj could be replaced with a new node (new phi)
2482 // and which does not have escape information. And later after some graph
2483 // reshape other locks and unlocks (which were not marked for elimination
2484 // before) are connected to this new obj (phi) but they still will not be
2485 // marked for elimination since new obj has no escape information.
2486 // Mark all associated (same box and obj) lock and unlock nodes for
2487 // elimination if some of them marked already.
2488 void PhaseMacroExpand::mark_eliminated_box(Node* oldbox, Node* obj) {
2489 if (oldbox->as_BoxLock()->is_eliminated())
2490 return; // This BoxLock node was processed already.
2491
2492 // New implementation (EliminateNestedLocks) has separate BoxLock
2493 // node for each locked region so mark all associated locks/unlocks as
2494 // eliminated even if different objects are referenced in one locked region
2495 // (for example, OSR compilation of nested loop inside locked scope).
2496 if (EliminateNestedLocks ||
2497 oldbox->as_BoxLock()->is_simple_lock_region(NULL, obj)) {
2498 // Box is used only in one lock region. Mark this box as eliminated.
2499 _igvn.hash_delete(oldbox);
2500 oldbox->as_BoxLock()->set_eliminated(); // This changes box's hash value
2501 _igvn.hash_insert(oldbox);
2502
2503 for (uint i = 0; i < oldbox->outcnt(); i++) {
2504 Node* u = oldbox->raw_out(i);
2505 if (u->is_AbstractLock() && !u->as_AbstractLock()->is_non_esc_obj()) {
2506 AbstractLockNode* alock = u->as_AbstractLock();
2507 // Check lock's box since box could be referenced by Lock's debug info.
2508 if (alock->box_node() == oldbox) {
2509 // Mark eliminated all related locks and unlocks.
2510 #ifdef ASSERT
2511 alock->log_lock_optimization(C, "eliminate_lock_set_non_esc4");
2512 #endif
2513 alock->set_non_esc_obj();
2514 }
2515 }
2516 }
2517 return;
2518 }
2519
2520 // Create new "eliminated" BoxLock node and use it in monitor debug info
2521 // instead of oldbox for the same object.
2522 BoxLockNode* newbox = oldbox->clone()->as_BoxLock();
2523
2524 // Note: BoxLock node is marked eliminated only here and it is used
2525 // to indicate that all associated lock and unlock nodes are marked
2526 // for elimination.
2527 newbox->set_eliminated();
2528 transform_later(newbox);
2529
2530 // Replace old box node with new box for all users of the same object.
2531 for (uint i = 0; i < oldbox->outcnt();) {
2532 bool next_edge = true;
2533
2534 Node* u = oldbox->raw_out(i);
2535 if (u->is_AbstractLock()) {
2536 AbstractLockNode* alock = u->as_AbstractLock();
2537 if (alock->box_node() == oldbox && alock->obj_node()->eqv_uncast(obj)) {
2538 // Replace Box and mark eliminated all related locks and unlocks.
2539 #ifdef ASSERT
2540 alock->log_lock_optimization(C, "eliminate_lock_set_non_esc5");
2541 #endif
2542 alock->set_non_esc_obj();
2543 _igvn.rehash_node_delayed(alock);
2544 alock->set_box_node(newbox);
2545 next_edge = false;
2546 }
2547 }
2548 if (u->is_FastLock() && u->as_FastLock()->obj_node()->eqv_uncast(obj)) {
2549 FastLockNode* flock = u->as_FastLock();
2550 assert(flock->box_node() == oldbox, "sanity");
2551 _igvn.rehash_node_delayed(flock);
2552 flock->set_box_node(newbox);
2553 next_edge = false;
2554 }
2555
2556 // Replace old box in monitor debug info.
2557 if (u->is_SafePoint() && u->as_SafePoint()->jvms()) {
2558 SafePointNode* sfn = u->as_SafePoint();
2559 JVMState* youngest_jvms = sfn->jvms();
2560 int max_depth = youngest_jvms->depth();
2561 for (int depth = 1; depth <= max_depth; depth++) {
2562 JVMState* jvms = youngest_jvms->of_depth(depth);
2563 int num_mon = jvms->nof_monitors();
2564 // Loop over monitors
2565 for (int idx = 0; idx < num_mon; idx++) {
2566 Node* obj_node = sfn->monitor_obj(jvms, idx);
2567 Node* box_node = sfn->monitor_box(jvms, idx);
2568 if (box_node == oldbox && obj_node->eqv_uncast(obj)) {
2569 int j = jvms->monitor_box_offset(idx);
2570 _igvn.replace_input_of(u, j, newbox);
2571 next_edge = false;
2572 }
2573 }
2574 }
2575 }
2576 if (next_edge) i++;
2577 }
2578 }
2579
2580 //-----------------------mark_eliminated_locking_nodes-----------------------
2581 void PhaseMacroExpand::mark_eliminated_locking_nodes(AbstractLockNode *alock) {
2582 if (EliminateNestedLocks) {
2583 if (alock->is_nested()) {
2584 assert(alock->box_node()->as_BoxLock()->is_eliminated(), "sanity");
2585 return;
2586 } else if (!alock->is_non_esc_obj()) { // Not eliminated or coarsened
2587 // Only Lock node has JVMState needed here.
2588 // Not that preceding claim is documented anywhere else.
2589 if (alock->jvms() != NULL) {
2590 if (alock->as_Lock()->is_nested_lock_region()) {
2591 // Mark eliminated related nested locks and unlocks.
2592 Node* obj = alock->obj_node();
2593 BoxLockNode* box_node = alock->box_node()->as_BoxLock();
2594 assert(!box_node->is_eliminated(), "should not be marked yet");
2595 // Note: BoxLock node is marked eliminated only here
2596 // and it is used to indicate that all associated lock
2597 // and unlock nodes are marked for elimination.
2598 box_node->set_eliminated(); // Box's hash is always NO_HASH here
2599 for (uint i = 0; i < box_node->outcnt(); i++) {
2600 Node* u = box_node->raw_out(i);
2601 if (u->is_AbstractLock()) {
2602 alock = u->as_AbstractLock();
2603 if (alock->box_node() == box_node) {
2604 // Verify that this Box is referenced only by related locks.
2605 assert(alock->obj_node()->eqv_uncast(obj), "");
2606 // Mark all related locks and unlocks.
2607 #ifdef ASSERT
2608 alock->log_lock_optimization(C, "eliminate_lock_set_nested");
2609 #endif
2610 alock->set_nested();
2611 }
2612 }
2613 }
2614 } else {
2615 #ifdef ASSERT
2616 alock->log_lock_optimization(C, "eliminate_lock_NOT_nested_lock_region");
2617 if (C->log() != NULL)
2618 alock->as_Lock()->is_nested_lock_region(C); // rerun for debugging output
2619 #endif
2620 }
2621 }
2622 return;
2623 }
2624 // Process locks for non escaping object
2625 assert(alock->is_non_esc_obj(), "");
2626 } // EliminateNestedLocks
2627
2628 if (alock->is_non_esc_obj()) { // Lock is used for non escaping object
2629 // Look for all locks of this object and mark them and
2630 // corresponding BoxLock nodes as eliminated.
2631 Node* obj = alock->obj_node();
2632 for (uint j = 0; j < obj->outcnt(); j++) {
2633 Node* o = obj->raw_out(j);
2634 if (o->is_AbstractLock() &&
2635 o->as_AbstractLock()->obj_node()->eqv_uncast(obj)) {
2636 alock = o->as_AbstractLock();
2637 Node* box = alock->box_node();
2638 // Replace old box node with new eliminated box for all users
2639 // of the same object and mark related locks as eliminated.
2640 mark_eliminated_box(box, obj);
2641 }
2642 }
2643 }
2644 }
2645
2646 // we have determined that this lock/unlock can be eliminated, we simply
2647 // eliminate the node without expanding it.
2648 //
2649 // Note: The membar's associated with the lock/unlock are currently not
2650 // eliminated. This should be investigated as a future enhancement.
2651 //
2652 bool PhaseMacroExpand::eliminate_locking_node(AbstractLockNode *alock) {
2653
2654 if (!alock->is_eliminated()) {
2655 return false;
2656 }
2657 #ifdef ASSERT
2658 if (!alock->is_coarsened()) {
2659 // Check that new "eliminated" BoxLock node is created.
2660 BoxLockNode* oldbox = alock->box_node()->as_BoxLock();
2661 assert(oldbox->is_eliminated(), "should be done already");
2662 }
2663 #endif
2664
2665 alock->log_lock_optimization(C, "eliminate_lock");
2666
2667 #ifndef PRODUCT
2668 if (PrintEliminateLocks) {
2669 if (alock->is_Lock()) {
2670 tty->print_cr("++++ Eliminated: %d Lock", alock->_idx);
2671 } else {
2672 tty->print_cr("++++ Eliminated: %d Unlock", alock->_idx);
2673 }
2674 }
2675 #endif
2676
2677 Node* mem = alock->in(TypeFunc::Memory);
2678 Node* ctrl = alock->in(TypeFunc::Control);
2679 guarantee(ctrl != NULL, "missing control projection, cannot replace_node() with NULL");
2680
2681 extract_call_projections(alock);
2682 // There are 2 projections from the lock. The lock node will
2683 // be deleted when its last use is subsumed below.
2684 assert(alock->outcnt() == 2 &&
2685 _fallthroughproj != NULL &&
2686 _memproj_fallthrough != NULL,
2687 "Unexpected projections from Lock/Unlock");
2688
2689 Node* fallthroughproj = _fallthroughproj;
2690 Node* memproj_fallthrough = _memproj_fallthrough;
2691
2692 // The memory projection from a lock/unlock is RawMem
2693 // The input to a Lock is merged memory, so extract its RawMem input
2694 // (unless the MergeMem has been optimized away.)
2695 if (alock->is_Lock()) {
2696 // Seach for MemBarAcquireLock node and delete it also.
2697 MemBarNode* membar = fallthroughproj->unique_ctrl_out()->as_MemBar();
2698 assert(membar != NULL && membar->Opcode() == Op_MemBarAcquireLock, "");
2699 Node* ctrlproj = membar->proj_out(TypeFunc::Control);
2700 Node* memproj = membar->proj_out(TypeFunc::Memory);
2701 _igvn.replace_node(ctrlproj, fallthroughproj);
2702 _igvn.replace_node(memproj, memproj_fallthrough);
2703
2704 // Delete FastLock node also if this Lock node is unique user
2705 // (a loop peeling may clone a Lock node).
2706 Node* flock = alock->as_Lock()->fastlock_node();
2707 if (flock->outcnt() == 1) {
2708 assert(flock->unique_out() == alock, "sanity");
2709 _igvn.replace_node(flock, top());
2710 }
2711 }
2712
2713 // Seach for MemBarReleaseLock node and delete it also.
2714 if (alock->is_Unlock() && ctrl->is_Proj() && ctrl->in(0)->is_MemBar()) {
2715 MemBarNode* membar = ctrl->in(0)->as_MemBar();
2716 assert(membar->Opcode() == Op_MemBarReleaseLock &&
2717 mem->is_Proj() && membar == mem->in(0), "");
2718 _igvn.replace_node(fallthroughproj, ctrl);
2719 _igvn.replace_node(memproj_fallthrough, mem);
2720 fallthroughproj = ctrl;
2721 memproj_fallthrough = mem;
2722 ctrl = membar->in(TypeFunc::Control);
2723 mem = membar->in(TypeFunc::Memory);
2724 }
2725
2726 _igvn.replace_node(fallthroughproj, ctrl);
2727 _igvn.replace_node(memproj_fallthrough, mem);
2728 return true;
2729 }
2730
2731
2732 //------------------------------expand_lock_node----------------------
2733 void PhaseMacroExpand::expand_lock_node(LockNode *lock) {
2734
2735 Node* ctrl = lock->in(TypeFunc::Control);
2736 Node* mem = lock->in(TypeFunc::Memory);
2737 Node* obj = lock->obj_node();
2738 Node* box = lock->box_node();
2739 Node* flock = lock->fastlock_node();
2740
2741 assert(!box->as_BoxLock()->is_eliminated(), "sanity");
2742
2743 // Make the merge point
2744 Node *region;
2745 Node *mem_phi;
2746 Node *slow_path;
2747
2748 if (UseOptoBiasInlining) {
2749 /*
2750 * See the full description in MacroAssembler::biased_locking_enter().
2751 *
2752 * if( (mark_word & biased_lock_mask) == biased_lock_pattern ) {
2753 * // The object is biased.
2754 * proto_node = klass->prototype_header;
2755 * o_node = thread | proto_node;
2756 * x_node = o_node ^ mark_word;
2757 * if( (x_node & ~age_mask) == 0 ) { // Biased to the current thread ?
2758 * // Done.
2759 * } else {
2760 * if( (x_node & biased_lock_mask) != 0 ) {
2761 * // The klass's prototype header is no longer biased.
2762 * cas(&mark_word, mark_word, proto_node)
2763 * goto cas_lock;
2764 * } else {
2765 * // The klass's prototype header is still biased.
2766 * if( (x_node & epoch_mask) != 0 ) { // Expired epoch?
2767 * old = mark_word;
2768 * new = o_node;
2769 * } else {
2770 * // Different thread or anonymous biased.
2771 * old = mark_word & (epoch_mask | age_mask | biased_lock_mask);
2772 * new = thread | old;
2773 * }
2774 * // Try to rebias.
2775 * if( cas(&mark_word, old, new) == 0 ) {
2776 * // Done.
2777 * } else {
2778 * goto slow_path; // Failed.
2779 * }
2780 * }
2781 * }
2782 * } else {
2783 * // The object is not biased.
2784 * cas_lock:
2785 * if( FastLock(obj) == 0 ) {
2786 * // Done.
2787 * } else {
2788 * slow_path:
2789 * OptoRuntime::complete_monitor_locking_Java(obj);
2790 * }
2791 * }
2792 */
2793
2794 region = new RegionNode(5);
2795 // create a Phi for the memory state
2796 mem_phi = new PhiNode( region, Type::MEMORY, TypeRawPtr::BOTTOM);
2797
2798 Node* fast_lock_region = new RegionNode(3);
2799 Node* fast_lock_mem_phi = new PhiNode( fast_lock_region, Type::MEMORY, TypeRawPtr::BOTTOM);
2800
2801 // First, check mark word for the biased lock pattern.
2802 Node* mark_node = make_load(ctrl, mem, obj, oopDesc::mark_offset_in_bytes(), TypeX_X, TypeX_X->basic_type());
2803
2804 // Get fast path - mark word has the biased lock pattern.
2805 ctrl = opt_bits_test(ctrl, fast_lock_region, 1, mark_node,
2806 markWord::biased_lock_mask_in_place,
2807 markWord::biased_lock_pattern, true);
2808 // fast_lock_region->in(1) is set to slow path.
2809 fast_lock_mem_phi->init_req(1, mem);
2810
2811 // Now check that the lock is biased to the current thread and has
2812 // the same epoch and bias as Klass::_prototype_header.
2813
2814 // Special-case a fresh allocation to avoid building nodes:
2815 Node* klass_node = AllocateNode::Ideal_klass(obj, &_igvn);
2816 if (klass_node == NULL) {
2817 Node* k_adr = basic_plus_adr(obj, oopDesc::klass_offset_in_bytes());
2818 klass_node = transform_later(LoadKlassNode::make(_igvn, NULL, mem, k_adr, _igvn.type(k_adr)->is_ptr()));
2819 #ifdef _LP64
2820 if (UseCompressedClassPointers && klass_node->is_DecodeNKlass()) {
2821 assert(klass_node->in(1)->Opcode() == Op_LoadNKlass, "sanity");
2822 klass_node->in(1)->init_req(0, ctrl);
2823 } else
2824 #endif
2825 klass_node->init_req(0, ctrl);
2826 }
2827 Node *proto_node = make_load(ctrl, mem, klass_node, in_bytes(Klass::prototype_header_offset()), TypeX_X, TypeX_X->basic_type());
2828
2829 Node* thread = transform_later(new ThreadLocalNode());
2830 Node* cast_thread = transform_later(new CastP2XNode(ctrl, thread));
2831 Node* o_node = transform_later(new OrXNode(cast_thread, proto_node));
2832 Node* x_node = transform_later(new XorXNode(o_node, mark_node));
2833
2834 // Get slow path - mark word does NOT match the value.
2835 STATIC_ASSERT(markWord::age_mask_in_place <= INT_MAX);
2836 Node* not_biased_ctrl = opt_bits_test(ctrl, region, 3, x_node,
2837 (~(int)markWord::age_mask_in_place), 0);
2838 // region->in(3) is set to fast path - the object is biased to the current thread.
2839 mem_phi->init_req(3, mem);
2840
2841
2842 // Mark word does NOT match the value (thread | Klass::_prototype_header).
2843
2844
2845 // First, check biased pattern.
2846 // Get fast path - _prototype_header has the same biased lock pattern.
2847 ctrl = opt_bits_test(not_biased_ctrl, fast_lock_region, 2, x_node,
2848 markWord::biased_lock_mask_in_place, 0, true);
2849
2850 not_biased_ctrl = fast_lock_region->in(2); // Slow path
2851 // fast_lock_region->in(2) - the prototype header is no longer biased
2852 // and we have to revoke the bias on this object.
2853 // We are going to try to reset the mark of this object to the prototype
2854 // value and fall through to the CAS-based locking scheme.
2855 Node* adr = basic_plus_adr(obj, oopDesc::mark_offset_in_bytes());
2856 Node* cas = new StoreXConditionalNode(not_biased_ctrl, mem, adr,
2857 proto_node, mark_node);
2858 transform_later(cas);
2859 Node* proj = transform_later(new SCMemProjNode(cas));
2860 fast_lock_mem_phi->init_req(2, proj);
2861
2862
2863 // Second, check epoch bits.
2864 Node* rebiased_region = new RegionNode(3);
2865 Node* old_phi = new PhiNode( rebiased_region, TypeX_X);
2866 Node* new_phi = new PhiNode( rebiased_region, TypeX_X);
2867
2868 // Get slow path - mark word does NOT match epoch bits.
2869 Node* epoch_ctrl = opt_bits_test(ctrl, rebiased_region, 1, x_node,
2870 markWord::epoch_mask_in_place, 0);
2871 // The epoch of the current bias is not valid, attempt to rebias the object
2872 // toward the current thread.
2873 rebiased_region->init_req(2, epoch_ctrl);
2874 old_phi->init_req(2, mark_node);
2875 new_phi->init_req(2, o_node);
2876
2877 // rebiased_region->in(1) is set to fast path.
2878 // The epoch of the current bias is still valid but we know
2879 // nothing about the owner; it might be set or it might be clear.
2880 Node* cmask = MakeConX(markWord::biased_lock_mask_in_place |
2881 markWord::age_mask_in_place |
2882 markWord::epoch_mask_in_place);
2883 Node* old = transform_later(new AndXNode(mark_node, cmask));
2884 cast_thread = transform_later(new CastP2XNode(ctrl, thread));
2885 Node* new_mark = transform_later(new OrXNode(cast_thread, old));
2886 old_phi->init_req(1, old);
2887 new_phi->init_req(1, new_mark);
2888
2889 transform_later(rebiased_region);
2890 transform_later(old_phi);
2891 transform_later(new_phi);
2892
2893 // Try to acquire the bias of the object using an atomic operation.
2894 // If this fails we will go in to the runtime to revoke the object's bias.
2895 cas = new StoreXConditionalNode(rebiased_region, mem, adr, new_phi, old_phi);
2896 transform_later(cas);
2897 proj = transform_later(new SCMemProjNode(cas));
2898
2899 // Get slow path - Failed to CAS.
2900 not_biased_ctrl = opt_bits_test(rebiased_region, region, 4, cas, 0, 0);
2901 mem_phi->init_req(4, proj);
2902 // region->in(4) is set to fast path - the object is rebiased to the current thread.
2903
2904 // Failed to CAS.
2905 slow_path = new RegionNode(3);
2906 Node *slow_mem = new PhiNode( slow_path, Type::MEMORY, TypeRawPtr::BOTTOM);
2907
2908 slow_path->init_req(1, not_biased_ctrl); // Capture slow-control
2909 slow_mem->init_req(1, proj);
2910
2911 // Call CAS-based locking scheme (FastLock node).
2912
2913 transform_later(fast_lock_region);
2914 transform_later(fast_lock_mem_phi);
2915
2916 // Get slow path - FastLock failed to lock the object.
2917 ctrl = opt_bits_test(fast_lock_region, region, 2, flock, 0, 0);
2918 mem_phi->init_req(2, fast_lock_mem_phi);
2919 // region->in(2) is set to fast path - the object is locked to the current thread.
2920
2921 slow_path->init_req(2, ctrl); // Capture slow-control
2922 slow_mem->init_req(2, fast_lock_mem_phi);
2923
2924 transform_later(slow_path);
2925 transform_later(slow_mem);
2926 // Reset lock's memory edge.
2927 lock->set_req(TypeFunc::Memory, slow_mem);
2928
2929 } else {
2930 region = new RegionNode(3);
2931 // create a Phi for the memory state
2932 mem_phi = new PhiNode( region, Type::MEMORY, TypeRawPtr::BOTTOM);
2933
2934 // Optimize test; set region slot 2
2935 slow_path = opt_bits_test(ctrl, region, 2, flock, 0, 0);
2936 mem_phi->init_req(2, mem);
2937 }
2938
2939 // Make slow path call
2940 CallNode *call = make_slow_call((CallNode *) lock, OptoRuntime::complete_monitor_enter_Type(),
2941 OptoRuntime::complete_monitor_locking_Java(), NULL, slow_path,
2942 obj, box, NULL);
2943
2944 extract_call_projections(call);
2945
2946 // Slow path can only throw asynchronous exceptions, which are always
2947 // de-opted. So the compiler thinks the slow-call can never throw an
2948 // exception. If it DOES throw an exception we would need the debug
2949 // info removed first (since if it throws there is no monitor).
2950 assert ( _ioproj_fallthrough == NULL && _ioproj_catchall == NULL &&
2951 _memproj_catchall == NULL && _catchallcatchproj == NULL, "Unexpected projection from Lock");
2952
2953 // Capture slow path
2954 // disconnect fall-through projection from call and create a new one
2955 // hook up users of fall-through projection to region
2956 Node *slow_ctrl = _fallthroughproj->clone();
2957 transform_later(slow_ctrl);
2958 _igvn.hash_delete(_fallthroughproj);
2959 _fallthroughproj->disconnect_inputs(NULL, C);
2960 region->init_req(1, slow_ctrl);
2961 // region inputs are now complete
2962 transform_later(region);
2963 _igvn.replace_node(_fallthroughproj, region);
2964
2965 Node *memproj = transform_later(new ProjNode(call, TypeFunc::Memory));
2966 mem_phi->init_req(1, memproj );
2967 transform_later(mem_phi);
2968 _igvn.replace_node(_memproj_fallthrough, mem_phi);
2969 }
2970
2971 //------------------------------expand_unlock_node----------------------
2972 void PhaseMacroExpand::expand_unlock_node(UnlockNode *unlock) {
2973
2974 Node* ctrl = unlock->in(TypeFunc::Control);
2975 Node* mem = unlock->in(TypeFunc::Memory);
2976 Node* obj = unlock->obj_node();
2977 Node* box = unlock->box_node();
2978
2979 assert(!box->as_BoxLock()->is_eliminated(), "sanity");
2980
2981 // No need for a null check on unlock
2982
2983 // Make the merge point
2984 Node *region;
2985 Node *mem_phi;
2986
2987 if (UseOptoBiasInlining) {
2988 // Check for biased locking unlock case, which is a no-op.
2989 // See the full description in MacroAssembler::biased_locking_exit().
2990 region = new RegionNode(4);
2991 // create a Phi for the memory state
2992 mem_phi = new PhiNode( region, Type::MEMORY, TypeRawPtr::BOTTOM);
2993 mem_phi->init_req(3, mem);
2994
2995 Node* mark_node = make_load(ctrl, mem, obj, oopDesc::mark_offset_in_bytes(), TypeX_X, TypeX_X->basic_type());
2996 ctrl = opt_bits_test(ctrl, region, 3, mark_node,
2997 markWord::biased_lock_mask_in_place,
2998 markWord::biased_lock_pattern);
2999 } else {
3000 region = new RegionNode(3);
3001 // create a Phi for the memory state
3002 mem_phi = new PhiNode( region, Type::MEMORY, TypeRawPtr::BOTTOM);
3003 }
3004
3005 FastUnlockNode *funlock = new FastUnlockNode( ctrl, obj, box );
3006 funlock = transform_later( funlock )->as_FastUnlock();
3007 // Optimize test; set region slot 2
3008 Node *slow_path = opt_bits_test(ctrl, region, 2, funlock, 0, 0);
3009 Node *thread = transform_later(new ThreadLocalNode());
3010
3011 CallNode *call = make_slow_call((CallNode *) unlock, OptoRuntime::complete_monitor_exit_Type(),
3012 CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C),
3013 "complete_monitor_unlocking_C", slow_path, obj, box, thread);
3014
3015 extract_call_projections(call);
3016
3017 assert ( _ioproj_fallthrough == NULL && _ioproj_catchall == NULL &&
3018 _memproj_catchall == NULL && _catchallcatchproj == NULL, "Unexpected projection from Lock");
3019
3020 // No exceptions for unlocking
3021 // Capture slow path
3022 // disconnect fall-through projection from call and create a new one
3023 // hook up users of fall-through projection to region
3024 Node *slow_ctrl = _fallthroughproj->clone();
3025 transform_later(slow_ctrl);
3026 _igvn.hash_delete(_fallthroughproj);
3027 _fallthroughproj->disconnect_inputs(NULL, C);
3028 region->init_req(1, slow_ctrl);
3029 // region inputs are now complete
3030 transform_later(region);
3031 _igvn.replace_node(_fallthroughproj, region);
3032
3033 Node *memproj = transform_later(new ProjNode(call, TypeFunc::Memory) );
3034 mem_phi->init_req(1, memproj );
3035 mem_phi->init_req(2, mem);
3036 transform_later(mem_phi);
3037 _igvn.replace_node(_memproj_fallthrough, mem_phi);
3038 }
3039
3040 void PhaseMacroExpand::expand_subtypecheck_node(SubTypeCheckNode *check) {
3041 assert(check->in(SubTypeCheckNode::Control) == NULL, "should be pinned");
3042 Node* bol = check->unique_out();
3043 Node* obj_or_subklass = check->in(SubTypeCheckNode::ObjOrSubKlass);
3044 Node* superklass = check->in(SubTypeCheckNode::SuperKlass);
3045 assert(bol->is_Bool() && bol->as_Bool()->_test._test == BoolTest::ne, "unexpected bool node");
3046
3047 for (DUIterator_Last imin, i = bol->last_outs(imin); i >= imin; --i) {
3048 Node* iff = bol->last_out(i);
3049 assert(iff->is_If(), "where's the if?");
3050
3051 if (iff->in(0)->is_top()) {
3052 _igvn.replace_input_of(iff, 1, C->top());
3053 continue;
3054 }
3055
3056 Node* iftrue = iff->as_If()->proj_out(1);
3057 Node* iffalse = iff->as_If()->proj_out(0);
3058 Node* ctrl = iff->in(0);
3059
3060 Node* subklass = NULL;
3061 if (_igvn.type(obj_or_subklass)->isa_klassptr()) {
3062 subklass = obj_or_subklass;
3063 } else {
3064 Node* k_adr = basic_plus_adr(obj_or_subklass, oopDesc::klass_offset_in_bytes());
3065 subklass = _igvn.transform(LoadKlassNode::make(_igvn, NULL, C->immutable_memory(), k_adr, TypeInstPtr::KLASS));
3066 }
3067
3068 Node* not_subtype_ctrl = Phase::gen_subtype_check(subklass, superklass, &ctrl, NULL, _igvn);
3069
3070 _igvn.replace_input_of(iff, 0, C->top());
3071 _igvn.replace_node(iftrue, not_subtype_ctrl);
3072 _igvn.replace_node(iffalse, ctrl);
3073 }
3074 _igvn.replace_node(check, C->top());
3075 }
3076
3077 //---------------------------eliminate_macro_nodes----------------------
3078 // Eliminate scalar replaced allocations and associated locks.
3079 void PhaseMacroExpand::eliminate_macro_nodes() {
3080 if (C->macro_count() == 0)
3081 return;
3082
3083 // First, attempt to eliminate locks
3084 int cnt = C->macro_count();
3085 for (int i=0; i < cnt; i++) {
3086 Node *n = C->macro_node(i);
3087 if (n->is_AbstractLock()) { // Lock and Unlock nodes
3088 // Before elimination mark all associated (same box and obj)
3089 // lock and unlock nodes.
3090 mark_eliminated_locking_nodes(n->as_AbstractLock());
3091 }
3092 }
3093 bool progress = true;
3094 while (progress) {
3095 progress = false;
3096 for (int i = C->macro_count(); i > 0; i--) {
3097 Node * n = C->macro_node(i-1);
3098 bool success = false;
3099 debug_only(int old_macro_count = C->macro_count(););
3100 if (n->is_AbstractLock()) {
3101 success = eliminate_locking_node(n->as_AbstractLock());
3102 }
3103 assert(success == (C->macro_count() < old_macro_count), "elimination reduces macro count");
3104 progress = progress || success;
3105 }
3106 }
3107 // Next, attempt to eliminate allocations
3108 _has_locks = false;
3109 progress = true;
3110 while (progress) {
3111 progress = false;
3112 for (int i = C->macro_count(); i > 0; i--) {
3113 Node * n = C->macro_node(i-1);
3114 bool success = false;
3115 debug_only(int old_macro_count = C->macro_count(););
3116 switch (n->class_id()) {
3117 case Node::Class_Allocate:
3118 case Node::Class_AllocateArray:
3119 success = eliminate_allocate_node(n->as_Allocate());
3120 break;
3121 case Node::Class_CallStaticJava:
3122 success = eliminate_boxing_node(n->as_CallStaticJava());
3123 break;
3124 case Node::Class_Lock:
3125 case Node::Class_Unlock:
3126 assert(!n->as_AbstractLock()->is_eliminated(), "sanity");
3127 _has_locks = true;
3128 break;
3129 case Node::Class_ArrayCopy:
3130 break;
3131 case Node::Class_OuterStripMinedLoop:
3132 break;
3133 case Node::Class_SubTypeCheck:
3134 break;
3135 default:
3136 assert(n->Opcode() == Op_LoopLimit ||
3137 n->Opcode() == Op_Opaque1 ||
3138 n->Opcode() == Op_Opaque2 ||
3139 n->Opcode() == Op_Opaque3 ||
3140 BarrierSet::barrier_set()->barrier_set_c2()->is_gc_barrier_node(n),
3141 "unknown node type in macro list");
3142 }
3143 assert(success == (C->macro_count() < old_macro_count), "elimination reduces macro count");
3144 progress = progress || success;
3145 }
3146 }
3147 }
3148
3149 //------------------------------expand_macro_nodes----------------------
3150 // Returns true if a failure occurred.
3151 bool PhaseMacroExpand::expand_macro_nodes() {
3152 // Last attempt to eliminate macro nodes.
3153 eliminate_macro_nodes();
3154
3155 // Eliminate Opaque and LoopLimit nodes. Do it after all loop optimizations.
3156 bool progress = true;
3157 while (progress) {
3158 progress = false;
3159 for (int i = C->macro_count(); i > 0; i--) {
3160 Node* n = C->macro_node(i-1);
3161 bool success = false;
3162 debug_only(int old_macro_count = C->macro_count(););
3163 if (n->Opcode() == Op_LoopLimit) {
3164 // Remove it from macro list and put on IGVN worklist to optimize.
3165 C->remove_macro_node(n);
3166 _igvn._worklist.push(n);
3167 success = true;
3168 } else if (n->Opcode() == Op_CallStaticJava) {
3169 // Remove it from macro list and put on IGVN worklist to optimize.
3170 C->remove_macro_node(n);
3171 _igvn._worklist.push(n);
3172 success = true;
3173 } else if (n->Opcode() == Op_Opaque1 || n->Opcode() == Op_Opaque2) {
3174 _igvn.replace_node(n, n->in(1));
3175 success = true;
3176 #if INCLUDE_RTM_OPT
3177 } else if ((n->Opcode() == Op_Opaque3) && ((Opaque3Node*)n)->rtm_opt()) {
3178 assert(C->profile_rtm(), "should be used only in rtm deoptimization code");
3179 assert((n->outcnt() == 1) && n->unique_out()->is_Cmp(), "");
3180 Node* cmp = n->unique_out();
3181 #ifdef ASSERT
3182 // Validate graph.
3183 assert((cmp->outcnt() == 1) && cmp->unique_out()->is_Bool(), "");
3184 BoolNode* bol = cmp->unique_out()->as_Bool();
3185 assert((bol->outcnt() == 1) && bol->unique_out()->is_If() &&
3186 (bol->_test._test == BoolTest::ne), "");
3187 IfNode* ifn = bol->unique_out()->as_If();
3188 assert((ifn->outcnt() == 2) &&
3189 ifn->proj_out(1)->is_uncommon_trap_proj(Deoptimization::Reason_rtm_state_change) != NULL, "");
3190 #endif
3191 Node* repl = n->in(1);
3192 if (!_has_locks) {
3193 // Remove RTM state check if there are no locks in the code.
3194 // Replace input to compare the same value.
3195 repl = (cmp->in(1) == n) ? cmp->in(2) : cmp->in(1);
3196 }
3197 _igvn.replace_node(n, repl);
3198 success = true;
3199 #endif
3200 } else if (n->Opcode() == Op_OuterStripMinedLoop) {
3201 n->as_OuterStripMinedLoop()->adjust_strip_mined_loop(&_igvn);
3202 C->remove_macro_node(n);
3203 success = true;
3204 }
3205 assert(!success || (C->macro_count() == (old_macro_count - 1)), "elimination must have deleted one node from macro list");
3206 progress = progress || success;
3207 }
3208 }
3209
3210 // Clean up the graph so we're less likely to hit the maximum node
3211 // limit
3212 _igvn.set_delay_transform(false);
3213 _igvn.optimize();
3214 if (C->failing()) return true;
3215 _igvn.set_delay_transform(true);
3216
3217
3218 // Because we run IGVN after each expansion, some macro nodes may go
3219 // dead and be removed from the list as we iterate over it. Move
3220 // Allocate nodes (processed in a second pass) at the beginning of
3221 // the list and then iterate from the last element of the list until
3222 // an Allocate node is seen. This is robust to random deletion in
3223 // the list due to nodes going dead.
3224 C->sort_macro_nodes();
3225
3226 // expand arraycopy "macro" nodes first
3227 // For ReduceBulkZeroing, we must first process all arraycopy nodes
3228 // before the allocate nodes are expanded.
3229 while (C->macro_count() > 0) {
3230 int macro_count = C->macro_count();
3231 Node * n = C->macro_node(macro_count-1);
3232 assert(n->is_macro(), "only macro nodes expected here");
3233 if (_igvn.type(n) == Type::TOP || (n->in(0) != NULL && n->in(0)->is_top())) {
3234 // node is unreachable, so don't try to expand it
3235 C->remove_macro_node(n);
3236 continue;
3237 }
3238 if (n->is_Allocate()) {
3239 break;
3240 }
3241 // Make sure expansion will not cause node limit to be exceeded.
3242 // Worst case is a macro node gets expanded into about 200 nodes.
3243 // Allow 50% more for optimization.
3244 if (C->check_node_count(300, "out of nodes before macro expansion")) {
3245 return true;
3246 }
3247
3248 debug_only(int old_macro_count = C->macro_count(););
3249 switch (n->class_id()) {
3250 case Node::Class_Lock:
3251 expand_lock_node(n->as_Lock());
3252 assert(C->macro_count() == (old_macro_count - 1), "expansion must have deleted one node from macro list");
3253 break;
3254 case Node::Class_Unlock:
3255 expand_unlock_node(n->as_Unlock());
3256 assert(C->macro_count() == (old_macro_count - 1), "expansion must have deleted one node from macro list");
3257 break;
3258 case Node::Class_ArrayCopy:
3259 expand_arraycopy_node(n->as_ArrayCopy());
3260 assert(C->macro_count() == (old_macro_count - 1), "expansion must have deleted one node from macro list");
3261 break;
3262 case Node::Class_SubTypeCheck:
3263 expand_subtypecheck_node(n->as_SubTypeCheck());
3264 assert(C->macro_count() == (old_macro_count - 1), "expansion must have deleted one node from macro list");
3265 break;
3266 default:
3267 assert(false, "unknown node type in macro list");
3268 }
3269 assert(C->macro_count() < macro_count, "must have deleted a node from macro list");
3270 if (C->failing()) return true;
3271
3272 // Clean up the graph so we're less likely to hit the maximum node
3273 // limit
3274 _igvn.set_delay_transform(false);
3275 _igvn.optimize();
3276 if (C->failing()) return true;
3277 _igvn.set_delay_transform(true);
3278 }
3279
3280 for (int i = C->macro_count(); i > 0; i --) {
3281 Node * n = C->macro_node(i-1);
3282 assert(n->is_macro(), "only macro nodes expected here");
3283
3284 switch (n->class_id()) {
3285 case Node::Class_Allocate:
3286 case Node::Class_AllocateArray:
3287 estimate_stack_allocation_size(n->as_Allocate());
3288 break;
3289 default:
3290 assert(false, "unknown node type in macro list");
3291 }
3292 }
3293
3294 // Check to see if stack allocation size is too large before macro expansion
3295 // so we can reject required stack allocations
3296 if (!stack_allocation_location_representable(C->fixed_slots() + C->stack_allocated_slots())) {
3297 C->set_fail_stack_allocation_with_references(true);
3298 }
3299
3300 // All nodes except Allocate nodes are expanded now. There could be
3301 // new optimization opportunities (such as folding newly created
3302 // load from a just allocated object). Run IGVN.
3303
3304 // expand "macro" nodes
3305 // nodes are removed from the macro list as they are processed
3306 while (C->macro_count() > 0) {
3307 int macro_count = C->macro_count();
3308 Node * n = C->macro_node(macro_count-1);
3309 assert(n->is_macro(), "only macro nodes expected here");
3310 if (_igvn.type(n) == Type::TOP || (n->in(0) != NULL && n->in(0)->is_top())) {
3311 // node is unreachable, so don't try to expand it
3312 C->remove_macro_node(n);
3313 continue;
3314 }
3315 // Make sure expansion will not cause node limit to be exceeded.
3316 // Worst case is a macro node gets expanded into about 200 nodes.
3317 // Allow 50% more for optimization.
3318 if (C->check_node_count(300, "out of nodes before macro expansion")) {
3319 return true;
3320 }
3321 switch (n->class_id()) {
3322 case Node::Class_Allocate:
3323 if (!stack_allocation(n->as_Allocate())) {
3324 expand_allocate(n->as_Allocate());
3325 }
3326 break;
3327 case Node::Class_AllocateArray:
3328 if (!stack_allocation(n->as_AllocateArray())) {
3329 expand_allocate_array(n->as_AllocateArray());
3330 }
3331 break;
3332 default:
3333 assert(false, "unknown node type in macro list");
3334 }
3335 assert(C->macro_count() < macro_count, "must have deleted a node from macro list");
3336 if (C->failing()) return true;
3337
3338 // Clean up the graph so we're less likely to hit the maximum node
3339 // limit
3340 _igvn.set_delay_transform(false);
3341 _igvn.optimize();
3342 if (C->failing()) return true;
3343 _igvn.set_delay_transform(true);
3344 }
3345
3346 _igvn.set_delay_transform(false);
3347 return false;
3348 }