1 Star 0 Fork 82

阿翔与山海经/电酱jdk8

forked from misaka00251/openjdk-1.8.0 
加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
8143925-enhancing-CounterMode.crypt-for-AESCrypt.patch 157.97 KB
一键复制 编辑 原始数据 按行查看 历史
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938
From 02b097417275acaad294d71a852c2def2222be25 Mon Sep 17 00:00:00 2001
From: kuenking111 <[email protected]>
Date: Sat, 3 Sep 2022 14:17:50 +0000
Subject: [PATCH 1/6] 8143925-enhancing-CounterMode.crypt-for-AESCrypt
---
.../src/cpu/aarch64/vm/assembler_aarch64.hpp | 35 +-
.../cpu/aarch64/vm/macroAssembler_aarch64.hpp | 17 +
.../aarch64/vm/macroAssembler_aarch64_aes.cpp | 685 ++++++++++++++++++
.../cpu/aarch64/vm/stubGenerator_aarch64.cpp | 324 ++++++++-
.../cpu/aarch64/vm/stubRoutines_aarch64.hpp | 2 +-
.../src/cpu/aarch64/vm/vm_version_aarch64.cpp | 13 +-
hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp | 5 +
hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp | 5 +
hotspot/src/cpu/x86/vm/assembler_x86.cpp | 74 +-
hotspot/src/cpu/x86/vm/assembler_x86.hpp | 12 +
.../src/cpu/x86/vm/stubGenerator_x86_32.cpp | 344 +++++++++
.../src/cpu/x86/vm/stubGenerator_x86_64.cpp | 340 ++++++++-
hotspot/src/cpu/x86/vm/stubRoutines_x86.cpp | 1 +
hotspot/src/cpu/x86/vm/stubRoutines_x86.hpp | 5 +
.../src/cpu/x86/vm/stubRoutines_x86_32.hpp | 2 +-
.../src/cpu/x86/vm/stubRoutines_x86_64.hpp | 2 +-
hotspot/src/cpu/x86/vm/vm_version_x86.cpp | 36 +
hotspot/src/share/vm/classfile/vmSymbols.hpp | 4 +
hotspot/src/share/vm/opto/escape.cpp | 1 +
hotspot/src/share/vm/opto/library_call.cpp | 174 +++++
hotspot/src/share/vm/opto/runtime.cpp | 29 +
hotspot/src/share/vm/opto/runtime.hpp | 1 +
hotspot/src/share/vm/runtime/globals.hpp | 3 +
hotspot/src/share/vm/runtime/stubRoutines.cpp | 1 +
hotspot/src/share/vm/runtime/stubRoutines.hpp | 2 +
hotspot/src/share/vm/runtime/vmStructs.cpp | 1 +
.../test/compiler/7184394/TestAESBase.java | 4 +-
.../test/compiler/7184394/TestAESMain.java | 7 +
.../com/sun/crypto/provider/CounterMode.java | 11 +-
.../classes/com/sun/crypto/provider/GCTR.java | 89 +--
.../com/sun/crypto/provider/GHASH.java | 20 +-
.../sun/security/ssl/SSLSocketImpl.java | 14 +-
.../security/ssl/SSLSocketInputRecord.java | 215 +++---
.../sun/security/ssl/SSLTransport.java | 4 +
.../bench/javax/crypto/full/AESGCMBench.java | 128 ++++
.../javax/crypto/full/AESGCMByteBuffer.java | 163 +++++
.../bench/javax/crypto/full/CryptoBase.java | 102 +++
.../bench/javax/crypto/small/AESGCMBench.java | 36 +
.../javax/crypto/small/AESGCMByteBuffer.java | 36 +
.../ssl/SSLSocketImpl/ClientTimeout.java | 3 +-
.../SSLSocketImpl/SSLExceptionForIOIssue.java | 4 +-
41 files changed, 2738 insertions(+), 216 deletions(-)
create mode 100644 hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64_aes.cpp
create mode 100644 jdk/test/micro/org/openjdk/bench/javax/crypto/full/AESGCMBench.java
create mode 100644 jdk/test/micro/org/openjdk/bench/javax/crypto/full/AESGCMByteBuffer.java
create mode 100644 jdk/test/micro/org/openjdk/bench/javax/crypto/full/CryptoBase.java
create mode 100644 jdk/test/micro/org/openjdk/bench/javax/crypto/small/AESGCMBench.java
create mode 100644 jdk/test/micro/org/openjdk/bench/javax/crypto/small/AESGCMByteBuffer.java
diff --git a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp
index b0fa9b5fc..9202e61f8 100644
--- a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp
+++ b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp
@@ -146,6 +146,21 @@ REGISTER_DECLARATION(Register, esp, r20);
#define assert_cond(ARG1) assert(ARG1, #ARG1)
+// In many places we've added C-style casts to silence compiler
+// warnings, for example when truncating a size_t to an int when we
+// know the size_t is a small struct. Such casts are risky because
+// they effectively disable useful compiler warnings. We can make our
+// lives safer with this function, which ensures that any cast is
+// reversible without loss of information. It doesn't check
+// everything: it isn't intended to make sure that pointer types are
+// compatible, for example.
+template <typename T2, typename T1>
+T2 checked_cast(T1 thing) {
+ T2 result = static_cast<T2>(thing);
+ assert(static_cast<T1>(result) == thing, "must be");
+ return result;
+}
+
namespace asm_util {
uint32_t encode_logical_immediate(bool is32, uint64_t imm);
};
@@ -193,7 +208,7 @@ public:
static inline uint32_t extract(uint32_t val, int msb, int lsb) {
int nbits = msb - lsb + 1;
assert_cond(msb >= lsb);
- uint32_t mask = (1U << nbits) - 1;
+ uint32_t mask = checked_cast<uint32_t>(right_n_bits(nbits));
uint32_t result = val >> lsb;
result &= mask;
return result;
@@ -208,7 +223,7 @@ public:
int nbits = msb - lsb + 1;
guarantee(val < (1U << nbits), "Field too big for insn");
assert_cond(msb >= lsb);
- unsigned mask = (1U << nbits) - 1;
+ unsigned mask = checked_cast<unsigned>(right_n_bits(nbits));
val <<= lsb;
mask <<= lsb;
unsigned target = *(unsigned *)a;
@@ -222,7 +237,7 @@ public:
long chk = val >> (nbits - 1);
guarantee (chk == -1 || chk == 0, "Field too big for insn");
unsigned uval = val;
- unsigned mask = (1U << nbits) - 1;
+ unsigned mask = checked_cast<unsigned>(right_n_bits(nbits));
uval &= mask;
uval <<= lsb;
mask <<= lsb;
@@ -234,9 +249,9 @@ public:
void f(unsigned val, int msb, int lsb) {
int nbits = msb - lsb + 1;
- guarantee(val < (1U << nbits), "Field too big for insn");
+ guarantee(val < (1ULL << nbits), "Field too big for insn");
assert_cond(msb >= lsb);
- unsigned mask = (1U << nbits) - 1;
+ unsigned mask = checked_cast<unsigned>(right_n_bits(nbits));
val <<= lsb;
mask <<= lsb;
insn |= val;
@@ -255,7 +270,7 @@ public:
long chk = val >> (nbits - 1);
guarantee (chk == -1 || chk == 0, "Field too big for insn");
unsigned uval = val;
- unsigned mask = (1U << nbits) - 1;
+ unsigned mask = checked_cast<unsigned>(right_n_bits(nbits));
uval &= mask;
f(uval, lsb + nbits - 1, lsb);
}
@@ -280,7 +295,7 @@ public:
unsigned get(int msb = 31, int lsb = 0) {
int nbits = msb - lsb + 1;
- unsigned mask = ((1U << nbits) - 1) << lsb;
+ unsigned mask = checked_cast<unsigned>(right_n_bits(nbits)) << lsb;
assert_cond((bits & mask) == mask);
return (insn & mask) >> lsb;
}
@@ -1991,21 +2006,21 @@ public:
starti;
f(0,31), f((int)T & 1, 30);
f(op1, 29, 21), f(0, 20, 16), f(op2, 15, 12);
- f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0);
+ f((int)T >> 1, 11, 10), srf(Xn, 5), rf(Vt, 0);
}
void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn,
int imm, int op1, int op2) {
starti;
f(0,31), f((int)T & 1, 30);
f(op1 | 0b100, 29, 21), f(0b11111, 20, 16), f(op2, 15, 12);
- f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0);
+ f((int)T >> 1, 11, 10), srf(Xn, 5), rf(Vt, 0);
}
void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn,
Register Xm, int op1, int op2) {
starti;
f(0,31), f((int)T & 1, 30);
f(op1 | 0b100, 29, 21), rf(Xm, 16), f(op2, 15, 12);
- f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0);
+ f((int)T >> 1, 11, 10), srf(Xn, 5), rf(Vt, 0);
}
void ld_st(FloatRegister Vt, SIMD_Arrangement T, Address a, int op1, int op2) {
diff --git a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
index 0ca694038..d334f1b69 100644
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
@@ -1240,6 +1240,23 @@ public:
void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z,
Register zlen, Register tmp1, Register tmp2, Register tmp3,
Register tmp4, Register tmp5, Register tmp6, Register tmp7);
+ void ghash_multiply(FloatRegister result_lo, FloatRegister result_hi,
+ FloatRegister a, FloatRegister b, FloatRegister a1_xor_a0,
+ FloatRegister tmp1, FloatRegister tmp2, FloatRegister tmp3);
+ void ghash_reduce(FloatRegister result, FloatRegister lo, FloatRegister hi,
+ FloatRegister p, FloatRegister z, FloatRegister t1);
+ void ghash_processBlocks_wide(address p, Register state, Register subkeyH,
+ Register data, Register blocks, int unrolls);
+ void ghash_modmul (FloatRegister result,
+ FloatRegister result_lo, FloatRegister result_hi, FloatRegister b,
+ FloatRegister a, FloatRegister vzr, FloatRegister a1_xor_a0, FloatRegister p,
+ FloatRegister t1, FloatRegister t2, FloatRegister t3);
+
+ void aesenc_loadkeys(Register key, Register keylen);
+ void aesecb_encrypt(Register from, Register to, Register keylen,
+ FloatRegister data = v0, int unrolls = 1);
+ void aesecb_decrypt(Register from, Register to, Register key, Register keylen);
+ void aes_round(FloatRegister input, FloatRegister subkey);
// ISB may be needed because of a safepoint
void maybe_isb() { isb(); }
diff --git a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64_aes.cpp b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64_aes.cpp
new file mode 100644
index 000000000..1db79c97a
--- /dev/null
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64_aes.cpp
@@ -0,0 +1,685 @@
+/*
+ * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+
+#include "asm/assembler.hpp"
+#include "asm/assembler.inline.hpp"
+#include "macroAssembler_aarch64.hpp"
+#include "memory/resourceArea.hpp"
+#include "runtime/stubRoutines.hpp"
+
+void MacroAssembler::aesecb_decrypt(Register from, Register to, Register key, Register keylen) {
+ Label L_doLast;
+
+ ld1(v0, T16B, from); // get 16 bytes of input
+
+ ld1(v5, T16B, post(key, 16));
+ rev32(v5, T16B, v5);
+
+ ld1(v1, v2, v3, v4, T16B, post(key, 64));
+ rev32(v1, T16B, v1);
+ rev32(v2, T16B, v2);
+ rev32(v3, T16B, v3);
+ rev32(v4, T16B, v4);
+ aesd(v0, v1);
+ aesimc(v0, v0);
+ aesd(v0, v2);
+ aesimc(v0, v0);
+ aesd(v0, v3);
+ aesimc(v0, v0);
+ aesd(v0, v4);
+ aesimc(v0, v0);
+
+ ld1(v1, v2, v3, v4, T16B, post(key, 64));
+ rev32(v1, T16B, v1);
+ rev32(v2, T16B, v2);
+ rev32(v3, T16B, v3);
+ rev32(v4, T16B, v4);
+ aesd(v0, v1);
+ aesimc(v0, v0);
+ aesd(v0, v2);
+ aesimc(v0, v0);
+ aesd(v0, v3);
+ aesimc(v0, v0);
+ aesd(v0, v4);
+ aesimc(v0, v0);
+
+ ld1(v1, v2, T16B, post(key, 32));
+ rev32(v1, T16B, v1);
+ rev32(v2, T16B, v2);
+
+ cmpw(keylen, 44);
+ br(Assembler::EQ, L_doLast);
+
+ aesd(v0, v1);
+ aesimc(v0, v0);
+ aesd(v0, v2);
+ aesimc(v0, v0);
+
+ ld1(v1, v2, T16B, post(key, 32));
+ rev32(v1, T16B, v1);
+ rev32(v2, T16B, v2);
+
+ cmpw(keylen, 52);
+ br(Assembler::EQ, L_doLast);
+
+ aesd(v0, v1);
+ aesimc(v0, v0);
+ aesd(v0, v2);
+ aesimc(v0, v0);
+
+ ld1(v1, v2, T16B, post(key, 32));
+ rev32(v1, T16B, v1);
+ rev32(v2, T16B, v2);
+
+ bind(L_doLast);
+
+ aesd(v0, v1);
+ aesimc(v0, v0);
+ aesd(v0, v2);
+
+ eor(v0, T16B, v0, v5);
+
+ st1(v0, T16B, to);
+
+ // Preserve the address of the start of the key
+ sub(key, key, keylen, LSL, exact_log2(sizeof (jint)));
+}
+
+// Load expanded key into v17..v31
+void MacroAssembler::aesenc_loadkeys(Register key, Register keylen) {
+ Label L_loadkeys_44, L_loadkeys_52;
+ cmpw(keylen, 52);
+ br(Assembler::LO, L_loadkeys_44);
+ br(Assembler::EQ, L_loadkeys_52);
+
+ ld1(v17, v18, T16B, post(key, 32));
+ rev32(v17, T16B, v17);
+ rev32(v18, T16B, v18);
+ bind(L_loadkeys_52);
+ ld1(v19, v20, T16B, post(key, 32));
+ rev32(v19, T16B, v19);
+ rev32(v20, T16B, v20);
+ bind(L_loadkeys_44);
+ ld1(v21, v22, v23, v24, T16B, post(key, 64));
+ rev32(v21, T16B, v21);
+ rev32(v22, T16B, v22);
+ rev32(v23, T16B, v23);
+ rev32(v24, T16B, v24);
+ ld1(v25, v26, v27, v28, T16B, post(key, 64));
+ rev32(v25, T16B, v25);
+ rev32(v26, T16B, v26);
+ rev32(v27, T16B, v27);
+ rev32(v28, T16B, v28);
+ ld1(v29, v30, v31, T16B, post(key, 48));
+ rev32(v29, T16B, v29);
+ rev32(v30, T16B, v30);
+ rev32(v31, T16B, v31);
+
+ // Preserve the address of the start of the key
+ sub(key, key, keylen, LSL, exact_log2(sizeof (jint)));
+}
+
+// NeoverseTM N1Software Optimization Guide:
+// Adjacent AESE/AESMC instruction pairs and adjacent AESD/AESIMC
+// instruction pairs will exhibit the performance characteristics
+// described in Section 4.6.
+void MacroAssembler::aes_round(FloatRegister input, FloatRegister subkey) {
+ aese(input, subkey); aesmc(input, input);
+}
+
+// KernelGenerator
+//
+// The abstract base class of an unrolled function generator.
+// Subclasses override generate(), length(), and next() to generate
+// unrolled and interleaved functions.
+//
+// The core idea is that a subclass defines a method which generates
+// the base case of a function and a method to generate a clone of it,
+// shifted to a different set of registers. KernelGenerator will then
+// generate several interleaved copies of the function, with each one
+// using a different set of registers.
+
+// The subclass must implement three methods: length(), which is the
+// number of instruction bundles in the intrinsic, generate(int n)
+// which emits the nth instruction bundle in the intrinsic, and next()
+// which takes an instance of the generator and returns a version of it,
+// shifted to a new set of registers.
+
+class KernelGenerator: public MacroAssembler {
+protected:
+ const int _unrolls;
+public:
+ KernelGenerator(Assembler *as, int unrolls)
+ : MacroAssembler(as->code()), _unrolls(unrolls) { }
+ virtual void generate(int index) = 0;
+ virtual int length() = 0;
+ virtual KernelGenerator *next() = 0;
+ int unrolls() { return _unrolls; }
+ void unroll();
+};
+
+void KernelGenerator::unroll() {
+ ResourceMark rm;
+ KernelGenerator **generators
+ = NEW_RESOURCE_ARRAY(KernelGenerator *, unrolls());
+
+ generators[0] = this;
+ for (int i = 1; i < unrolls(); i++) {
+ generators[i] = generators[i-1]->next();
+ }
+
+ for (int j = 0; j < length(); j++) {
+ for (int i = 0; i < unrolls(); i++) {
+ generators[i]->generate(j);
+ }
+ }
+}
+
+// An unrolled and interleaved generator for AES encryption.
+class AESKernelGenerator: public KernelGenerator {
+ Register _from, _to;
+ const Register _keylen;
+ FloatRegister _data;
+ const FloatRegister _subkeys;
+ bool _once;
+ Label _rounds_44, _rounds_52;
+
+public:
+ AESKernelGenerator(Assembler *as, int unrolls,
+ Register from, Register to, Register keylen, FloatRegister data,
+ FloatRegister subkeys, bool once = true)
+ : KernelGenerator(as, unrolls),
+ _from(from), _to(to), _keylen(keylen), _data(data),
+ _subkeys(subkeys), _once(once) {
+ }
+
+ virtual void generate(int index) {
+ switch (index) {
+ case 0:
+ if (_from != noreg) {
+ ld1(_data, T16B, _from); // get 16 bytes of input
+ }
+ break;
+ case 1:
+ if (_once) {
+ cmpw(_keylen, 52);
+ br(Assembler::LO, _rounds_44);
+ br(Assembler::EQ, _rounds_52);
+ }
+ break;
+ case 2: aes_round(_data, _subkeys + 0); break;
+ case 3: aes_round(_data, _subkeys + 1); break;
+ case 4:
+ if (_once) bind(_rounds_52);
+ break;
+ case 5: aes_round(_data, _subkeys + 2); break;
+ case 6: aes_round(_data, _subkeys + 3); break;
+ case 7:
+ if (_once) bind(_rounds_44);
+ break;
+ case 8: aes_round(_data, _subkeys + 4); break;
+ case 9: aes_round(_data, _subkeys + 5); break;
+ case 10: aes_round(_data, _subkeys + 6); break;
+ case 11: aes_round(_data, _subkeys + 7); break;
+ case 12: aes_round(_data, _subkeys + 8); break;
+ case 13: aes_round(_data, _subkeys + 9); break;
+ case 14: aes_round(_data, _subkeys + 10); break;
+ case 15: aes_round(_data, _subkeys + 11); break;
+ case 16: aes_round(_data, _subkeys + 12); break;
+ case 17: aese(_data, _subkeys + 13); break;
+ case 18: eor(_data, T16B, _data, _subkeys + 14); break;
+ case 19:
+ if (_to != noreg) {
+ st1(_data, T16B, _to);
+ }
+ break;
+ default: ShouldNotReachHere();
+ }
+ }
+
+ virtual KernelGenerator *next() {
+ return new AESKernelGenerator(this, _unrolls,
+ _from, _to, _keylen,
+ _data + 1, _subkeys, /*once*/false);
+ }
+
+ virtual int length() { return 20; }
+};
+
+// Uses expanded key in v17..v31
+// Returns encrypted values in inputs.
+// If to != noreg, store value at to; likewise from
+// Preserves key, keylen
+// Increments from, to
+// Input data in v0, v1, ...
+// unrolls controls the number of times to unroll the generated function
+void MacroAssembler::aesecb_encrypt(Register from, Register to, Register keylen,
+ FloatRegister data, int unrolls) {
+ AESKernelGenerator(this, unrolls, from, to, keylen, data, v17) .unroll();
+}
+
+// ghash_multiply and ghash_reduce are the non-unrolled versions of
+// the GHASH function generators.
+void MacroAssembler::ghash_multiply(FloatRegister result_lo, FloatRegister result_hi,
+ FloatRegister a, FloatRegister b, FloatRegister a1_xor_a0,
+ FloatRegister tmp1, FloatRegister tmp2, FloatRegister tmp3) {
+ // Karatsuba multiplication performs a 128*128 -> 256-bit
+ // multiplication in three 128-bit multiplications and a few
+ // additions.
+ //
+ // (C1:C0) = A1*B1, (D1:D0) = A0*B0, (E1:E0) = (A0+A1)(B0+B1)
+ // (A1:A0)(B1:B0) = C1:(C0+C1+D1+E1):(D1+C0+D0+E0):D0
+ //
+ // Inputs:
+ //
+ // A0 in a.d[0] (subkey)
+ // A1 in a.d[1]
+ // (A1+A0) in a1_xor_a0.d[0]
+ //
+ // B0 in b.d[0] (state)
+ // B1 in b.d[1]
+
+ ext(tmp1, T16B, b, b, 0x08);
+ pmull2(result_hi, T1Q, b, a, T2D); // A1*B1
+ eor(tmp1, T16B, tmp1, b); // (B1+B0)
+ pmull(result_lo, T1Q, b, a, T1D); // A0*B0
+ pmull(tmp2, T1Q, tmp1, a1_xor_a0, T1D); // (A1+A0)(B1+B0)
+
+ ext(tmp1, T16B, result_lo, result_hi, 0x08);
+ eor(tmp3, T16B, result_hi, result_lo); // A1*B1+A0*B0
+ eor(tmp2, T16B, tmp2, tmp1);
+ eor(tmp2, T16B, tmp2, tmp3);
+
+ // Register pair <result_hi:result_lo> holds the result of carry-less multiplication
+ ins(result_hi, D, tmp2, 0, 1);
+ ins(result_lo, D, tmp2, 1, 0);
+}
+
+void MacroAssembler::ghash_reduce(FloatRegister result, FloatRegister lo, FloatRegister hi,
+ FloatRegister p, FloatRegister vzr, FloatRegister t1) {
+ const FloatRegister t0 = result;
+
+ // The GCM field polynomial f is z^128 + p(z), where p =
+ // z^7+z^2+z+1.
+ //
+ // z^128 === -p(z) (mod (z^128 + p(z)))
+ //
+ // so, given that the product we're reducing is
+ // a == lo + hi * z^128
+ // substituting,
+ // === lo - hi * p(z) (mod (z^128 + p(z)))
+ //
+ // we reduce by multiplying hi by p(z) and subtracting the result
+ // from (i.e. XORing it with) lo. Because p has no nonzero high
+ // bits we can do this with two 64-bit multiplications, lo*p and
+ // hi*p.
+
+ pmull2(t0, T1Q, hi, p, T2D);
+ ext(t1, T16B, t0, vzr, 8);
+ eor(hi, T16B, hi, t1);
+ ext(t1, T16B, vzr, t0, 8);
+ eor(lo, T16B, lo, t1);
+ pmull(t0, T1Q, hi, p, T1D);
+ eor(result, T16B, lo, t0);
+}
+
+class GHASHMultiplyGenerator: public KernelGenerator {
+ FloatRegister _result_lo, _result_hi, _b,
+ _a, _vzr, _a1_xor_a0, _p,
+ _tmp1, _tmp2, _tmp3;
+
+public:
+ GHASHMultiplyGenerator(Assembler *as, int unrolls,
+ FloatRegister result_lo, FloatRegister result_hi,
+ /* offsetted registers */
+ FloatRegister b,
+ /* non-offsetted (shared) registers */
+ FloatRegister a, FloatRegister a1_xor_a0, FloatRegister p, FloatRegister vzr,
+ /* offseted (temp) registers */
+ FloatRegister tmp1, FloatRegister tmp2, FloatRegister tmp3)
+ : KernelGenerator(as, unrolls),
+ _result_lo(result_lo), _result_hi(result_hi), _b(b),
+ _a(a), _vzr(vzr), _a1_xor_a0(a1_xor_a0), _p(p),
+ _tmp1(tmp1), _tmp2(tmp2), _tmp3(tmp3) { }
+
+ static const int register_stride = 7;
+
+ virtual void generate(int index) {
+ // Karatsuba multiplication performs a 128*128 -> 256-bit
+ // multiplication in three 128-bit multiplications and a few
+ // additions.
+ //
+ // (C1:C0) = A1*B1, (D1:D0) = A0*B0, (E1:E0) = (A0+A1)(B0+B1)
+ // (A1:A0)(B1:B0) = C1:(C0+C1+D1+E1):(D1+C0+D0+E0):D0
+ //
+ // Inputs:
+ //
+ // A0 in a.d[0] (subkey)
+ // A1 in a.d[1]
+ // (A1+A0) in a1_xor_a0.d[0]
+ //
+ // B0 in b.d[0] (state)
+ // B1 in b.d[1]
+
+ switch (index) {
+ case 0: ext(_tmp1, T16B, _b, _b, 0x08); break;
+ case 1: pmull2(_result_hi, T1Q, _b, _a, T2D); // A1*B1
+ break;
+ case 2: eor(_tmp1, T16B, _tmp1, _b); // (B1+B0)
+ break;
+ case 3: pmull(_result_lo, T1Q, _b, _a, T1D); // A0*B0
+ break;
+ case 4: pmull(_tmp2, T1Q, _tmp1, _a1_xor_a0, T1D); // (A1+A0)(B1+B0)
+ break;
+
+ case 5: ext(_tmp1, T16B, _result_lo, _result_hi, 0x08); break;
+ case 6: eor(_tmp3, T16B, _result_hi, _result_lo); // A1*B1+A0*B0
+ break;
+ case 7: eor(_tmp2, T16B, _tmp2, _tmp1); break;
+ case 8: eor(_tmp2, T16B, _tmp2, _tmp3); break;
+
+ // Register pair <_result_hi:_result_lo> holds the _result of carry-less multiplication
+ case 9: ins(_result_hi, D, _tmp2, 0, 1); break;
+ case 10: ins(_result_lo, D, _tmp2, 1, 0); break;
+ default: ShouldNotReachHere();
+ }
+ }
+
+ virtual KernelGenerator *next() {
+ GHASHMultiplyGenerator *result
+ = new GHASHMultiplyGenerator(this, _unrolls, _result_lo, _result_hi,
+ _b, _a, _a1_xor_a0, _p, _vzr,
+ _tmp1, _tmp2, _tmp3);
+ result->_result_lo += register_stride;
+ result->_result_hi += register_stride;
+ result->_b += register_stride;
+ result->_tmp1 += register_stride;
+ result->_tmp2 += register_stride;
+ result->_tmp3 += register_stride;
+ return result;
+ }
+
+ virtual int length() { return 11; }
+};
+
+// Reduce the 128-bit product in hi:lo by the GCM field polynomial.
+// The FloatRegister argument called data is optional: if it is a
+// valid register, we interleave LD1 instructions with the
+// reduction. This is to reduce latency next time around the loop.
+class GHASHReduceGenerator: public KernelGenerator {
+ FloatRegister _result, _lo, _hi, _p, _vzr, _data, _t1;
+ int _once;
+public:
+ GHASHReduceGenerator(Assembler *as, int unrolls,
+ /* offsetted registers */
+ FloatRegister result, FloatRegister lo, FloatRegister hi,
+ /* non-offsetted (shared) registers */
+ FloatRegister p, FloatRegister vzr, FloatRegister data,
+ /* offseted (temp) registers */
+ FloatRegister t1)
+ : KernelGenerator(as, unrolls),
+ _result(result), _lo(lo), _hi(hi),
+ _p(p), _vzr(vzr), _data(data), _t1(t1), _once(true) { }
+
+ static const int register_stride = 7;
+
+ virtual void generate(int index) {
+ const FloatRegister t0 = _result;
+
+ switch (index) {
+ // The GCM field polynomial f is z^128 + p(z), where p =
+ // z^7+z^2+z+1.
+ //
+ // z^128 === -p(z) (mod (z^128 + p(z)))
+ //
+ // so, given that the product we're reducing is
+ // a == lo + hi * z^128
+ // substituting,
+ // === lo - hi * p(z) (mod (z^128 + p(z)))
+ //
+ // we reduce by multiplying hi by p(z) and subtracting the _result
+ // from (i.e. XORing it with) lo. Because p has no nonzero high
+ // bits we can do this with two 64-bit multiplications, lo*p and
+ // hi*p.
+
+ case 0: pmull2(t0, T1Q, _hi, _p, T2D); break;
+ case 1: ext(_t1, T16B, t0, _vzr, 8); break;
+ case 2: eor(_hi, T16B, _hi, _t1); break;
+ case 3: ext(_t1, T16B, _vzr, t0, 8); break;
+ case 4: eor(_lo, T16B, _lo, _t1); break;
+ case 5: pmull(t0, T1Q, _hi, _p, T1D); break;
+ case 6: eor(_result, T16B, _lo, t0); break;
+ default: ShouldNotReachHere();
+ }
+
+ // Sprinkle load instructions into the generated instructions
+ if (_data->is_valid() && _once) {
+ assert(length() >= unrolls(), "not enough room for inteleaved loads");
+ if (index < unrolls()) {
+ ld1((_data + index*register_stride), T16B, post(r2, 0x10));
+ }
+ }
+ }
+
+ virtual KernelGenerator *next() {
+ GHASHReduceGenerator *result
+ = new GHASHReduceGenerator(this, _unrolls,
+ _result, _lo, _hi, _p, _vzr, _data, _t1);
+ result->_result += register_stride;
+ result->_hi += register_stride;
+ result->_lo += register_stride;
+ result->_t1 += register_stride;
+ result->_once = false;
+ return result;
+ }
+
+ int length() { return 7; }
+};
+
+// Perform a GHASH multiply/reduce on a single FloatRegister.
+void MacroAssembler::ghash_modmul(FloatRegister result,
+ FloatRegister result_lo, FloatRegister result_hi, FloatRegister b,
+ FloatRegister a, FloatRegister vzr, FloatRegister a1_xor_a0, FloatRegister p,
+ FloatRegister t1, FloatRegister t2, FloatRegister t3) {
+ ghash_multiply(result_lo, result_hi, a, b, a1_xor_a0, t1, t2, t3);
+ ghash_reduce(result, result_lo, result_hi, p, vzr, t1);
+}
+
+// Interleaved GHASH processing.
+//
+// Clobbers all vector registers.
+//
+void MacroAssembler::ghash_processBlocks_wide(address field_polynomial, Register state,
+ Register subkeyH,
+ Register data, Register blocks, int unrolls) {
+ int register_stride = 7;
+
+ // Bafflingly, GCM uses little-endian for the byte order, but
+ // big-endian for the bit order. For example, the polynomial 1 is
+ // represented as the 16-byte string 80 00 00 00 | 12 bytes of 00.
+ //
+ // So, we must either reverse the bytes in each word and do
+ // everything big-endian or reverse the bits in each byte and do
+ // it little-endian. On AArch64 it's more idiomatic to reverse
+ // the bits in each byte (we have an instruction, RBIT, to do
+ // that) and keep the data in little-endian bit order throught the
+ // calculation, bit-reversing the inputs and outputs.
+
+ assert(unrolls * register_stride < 32, "out of registers");
+
+ FloatRegister a1_xor_a0 = v28;
+ FloatRegister Hprime = v29;
+ FloatRegister vzr = v30;
+ FloatRegister p = v31;
+ eor(vzr, T16B, vzr, vzr); // zero register
+
+ ldrq(p, field_polynomial); // The field polynomial
+
+ ldrq(v0, Address(state));
+ ldrq(Hprime, Address(subkeyH));
+
+ rev64(v0, T16B, v0); // Bit-reverse words in state and subkeyH
+ rbit(v0, T16B, v0);
+ rev64(Hprime, T16B, Hprime);
+ rbit(Hprime, T16B, Hprime);
+
+ // Powers of H -> Hprime
+
+ Label already_calculated, done;
+ {
+ // The first time around we'll have to calculate H**2, H**3, etc.
+ // Look at the largest power of H in the subkeyH array to see if
+ // it's already been calculated.
+ ldp(rscratch1, rscratch2, Address(subkeyH, 16 * (unrolls - 1)));
+ orr(rscratch1, rscratch1, rscratch2);
+ cbnz(rscratch1, already_calculated);
+
+ orr(v6, T16B, Hprime, Hprime); // Start with H in v6 and Hprime
+ for (int i = 1; i < unrolls; i++) {
+ ext(a1_xor_a0, T16B, Hprime, Hprime, 0x08); // long-swap subkeyH into a1_xor_a0
+ eor(a1_xor_a0, T16B, a1_xor_a0, Hprime); // xor subkeyH into subkeyL (Karatsuba: (A1+A0))
+ ghash_modmul(/*result*/v6, /*result_lo*/v5, /*result_hi*/v4, /*b*/v6,
+ Hprime, vzr, a1_xor_a0, p,
+ /*temps*/v1, v3, v2);
+ rev64(v1, T16B, v6);
+ rbit(v1, T16B, v1);
+ strq(v1, Address(subkeyH, 16 * i));
+ }
+ b(done);
+ }
+ {
+ bind(already_calculated);
+
+ // Load the largest power of H we need into v6.
+ ldrq(v6, Address(subkeyH, 16 * (unrolls - 1)));
+ rev64(v6, T16B, v6);
+ rbit(v6, T16B, v6);
+ }
+ bind(done);
+
+ orr(Hprime, T16B, v6, v6); // Move H ** unrolls into Hprime
+
+ // Hprime contains (H ** 1, H ** 2, ... H ** unrolls)
+ // v0 contains the initial state. Clear the others.
+ for (int i = 1; i < unrolls; i++) {
+ int ofs = register_stride * i;
+ eor(ofs+v0, T16B, ofs+v0, ofs+v0); // zero each state register
+ }
+
+ ext(a1_xor_a0, T16B, Hprime, Hprime, 0x08); // long-swap subkeyH into a1_xor_a0
+ eor(a1_xor_a0, T16B, a1_xor_a0, Hprime); // xor subkeyH into subkeyL (Karatsuba: (A1+A0))
+
+ // Load #unrolls blocks of data
+ for (int ofs = 0; ofs < unrolls * register_stride; ofs += register_stride) {
+ ld1(v2+ofs, T16B, post(data, 0x10));
+ }
+
+ // Register assignments, replicated across 4 clones, v0 ... v23
+ //
+ // v0: input / output: current state, result of multiply/reduce
+ // v1: temp
+ // v2: input: one block of data (the ciphertext)
+ // also used as a temp once the data has been consumed
+ // v3: temp
+ // v4: output: high part of product
+ // v5: output: low part ...
+ // v6: unused
+ //
+ // Not replicated:
+ //
+ // v28: High part of H xor low part of H'
+ // v29: H' (hash subkey)
+ // v30: zero
+ // v31: Reduction polynomial of the Galois field
+
+ // Inner loop.
+ // Do the whole load/add/multiply/reduce over all our data except
+ // the last few rows.
+ {
+ Label L_ghash_loop;
+ bind(L_ghash_loop);
+
+ // Prefetching doesn't help here. In fact, on Neoverse N1 it's worse.
+ // prfm(Address(data, 128), PLDL1KEEP);
+
+ // Xor data into current state
+ for (int ofs = 0; ofs < unrolls * register_stride; ofs += register_stride) {
+ rbit((v2+ofs), T16B, (v2+ofs));
+ eor((v2+ofs), T16B, v0+ofs, (v2+ofs)); // bit-swapped data ^ bit-swapped state
+ }
+
+ // Generate fully-unrolled multiply-reduce in two stages.
+
+ (new GHASHMultiplyGenerator(this, unrolls,
+ /*result_lo*/v5, /*result_hi*/v4, /*data*/v2,
+ Hprime, a1_xor_a0, p, vzr,
+ /*temps*/v1, v3, /* reuse b*/v2))->unroll();
+
+ // NB: GHASHReduceGenerator also loads the next #unrolls blocks of
+ // data into v0, v0+ofs, the current state.
+ (new GHASHReduceGenerator (this, unrolls,
+ /*result*/v0, /*lo*/v5, /*hi*/v4, p, vzr,
+ /*data*/v2, /*temp*/v3))->unroll();
+
+ sub(blocks, blocks, unrolls);
+ cmp(blocks, (unsigned char)(unrolls * 2));
+ br(GE, L_ghash_loop);
+ }
+
+ // Merge the #unrolls states. Note that the data for the next
+ // iteration has already been loaded into v4, v4+ofs, etc...
+
+ // First, we multiply/reduce each clone by the appropriate power of H.
+ for (int i = 0; i < unrolls; i++) {
+ int ofs = register_stride * i;
+ ldrq(Hprime, Address(subkeyH, 16 * (unrolls - i - 1)));
+
+ rbit(v2+ofs, T16B, v2+ofs);
+ eor(v2+ofs, T16B, ofs+v0, v2+ofs); // bit-swapped data ^ bit-swapped state
+
+ rev64(Hprime, T16B, Hprime);
+ rbit(Hprime, T16B, Hprime);
+ ext(a1_xor_a0, T16B, Hprime, Hprime, 0x08); // long-swap subkeyH into a1_xor_a0
+ eor(a1_xor_a0, T16B, a1_xor_a0, Hprime); // xor subkeyH into subkeyL (Karatsuba: (A1+A0))
+ ghash_modmul(/*result*/v0+ofs, /*result_lo*/v5+ofs, /*result_hi*/v4+ofs, /*b*/v2+ofs,
+ Hprime, vzr, a1_xor_a0, p,
+ /*temps*/v1+ofs, v3+ofs, /* reuse b*/v2+ofs);
+ }
+
+ // Then we sum the results.
+ for (int i = 0; i < unrolls - 1; i++) {
+ int ofs = register_stride * i;
+ eor(v0, T16B, v0, v0 + register_stride + ofs);
+ }
+
+ sub(blocks, blocks, (unsigned char)unrolls);
+
+ // And finally bit-reverse the state back to big endian.
+ rev64(v0, T16B, v0);
+ rbit(v0, T16B, v0);
+ st1(v0, T16B, state);
+}
\ No newline at end of file
diff --git a/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
index 2e2e8ae78..c024dec55 100644
--- a/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
+++ b/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
@@ -2804,6 +2804,266 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
+ // CTR AES crypt.
+ // Arguments:
+ //
+ // Inputs:
+ // c_rarg0 - source byte array address
+ // c_rarg1 - destination byte array address
+ // c_rarg2 - K (key) in little endian int array
+ // c_rarg3 - counter vector byte array address
+ // c_rarg4 - input length
+ // c_rarg5 - saved encryptedCounter start
+ // c_rarg6 - saved used length
+ //
+ // Output:
+ // r0 - input length
+ //
+ address generate_counterMode_AESCrypt() {
+ const Register in = c_rarg0;
+ const Register out = c_rarg1;
+ const Register key = c_rarg2;
+ const Register counter = c_rarg3;
+ const Register saved_len = c_rarg4, len = r10;
+ const Register saved_encrypted_ctr = c_rarg5;
+ const Register used_ptr = c_rarg6, used = r12;
+
+ const Register offset = r7;
+ const Register keylen = r11;
+
+ const unsigned char block_size = 16;
+ const int bulk_width = 4;
+ // NB: bulk_width can be 4 or 8. 8 gives slightly faster
+ // performance with larger data sizes, but it also means that the
+ // fast path isn't used until you have at least 8 blocks, and up
+ // to 127 bytes of data will be executed on the slow path. For
+ // that reason, and also so as not to blow away too much icache, 4
+ // blocks seems like a sensible compromise.
+
+ // Algorithm:
+ //
+ // if (len == 0) {
+ // goto DONE;
+ // }
+ // int result = len;
+ // do {
+ // if (used >= blockSize) {
+ // if (len >= bulk_width * blockSize) {
+ // CTR_large_block();
+ // if (len == 0)
+ // goto DONE;
+ // }
+ // for (;;) {
+ // 16ByteVector v0 = counter;
+ // embeddedCipher.encryptBlock(v0, 0, encryptedCounter, 0);
+ // used = 0;
+ // if (len < blockSize)
+ // break; /* goto NEXT */
+ // 16ByteVector v1 = load16Bytes(in, offset);
+ // v1 = v1 ^ encryptedCounter;
+ // store16Bytes(out, offset);
+ // used = blockSize;
+ // offset += blockSize;
+ // len -= blockSize;
+ // if (len == 0)
+ // goto DONE;
+ // }
+ // }
+ // NEXT:
+ // out[outOff++] = (byte)(in[inOff++] ^ encryptedCounter[used++]);
+ // len--;
+ // } while (len != 0);
+ // DONE:
+ // return result;
+ //
+ // CTR_large_block()
+ // Wide bulk encryption of whole blocks.
+
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "counterMode_AESCrypt");
+ const address start = __ pc();
+ __ enter();
+
+ Label DONE, CTR_large_block, large_block_return;
+ __ ldrw(used, Address(used_ptr));
+ __ cbzw(saved_len, DONE);
+
+ __ mov(len, saved_len);
+ __ mov(offset, 0);
+
+ // Compute #rounds for AES based on the length of the key array
+ __ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+
+ __ aesenc_loadkeys(key, keylen);
+
+ {
+ Label L_CTR_loop, NEXT;
+
+ __ bind(L_CTR_loop);
+
+ __ cmp(used, block_size);
+ __ br(__ LO, NEXT);
+
+ // Maybe we have a lot of data
+ __ subsw(rscratch1, len, bulk_width * block_size);
+ __ br(__ HS, CTR_large_block);
+ __ BIND(large_block_return);
+ __ cbzw(len, DONE);
+
+ // Setup the counter
+ __ movi(v4, __ T4S, 0);
+ __ movi(v5, __ T4S, 1);
+ __ ins(v4, __ S, v5, 3, 3); // v4 contains { 0, 0, 0, 1 }
+
+ __ ld1(v0, __ T16B, counter); // Load the counter into v0
+ __ rev32(v16, __ T16B, v0);
+ __ addv(v16, __ T4S, v16, v4);
+ __ rev32(v16, __ T16B, v16);
+ __ st1(v16, __ T16B, counter); // Save the incremented counter back
+
+ {
+ // We have fewer than bulk_width blocks of data left. Encrypt
+ // them one by one until there is less than a full block
+ // remaining, being careful to save both the encrypted counter
+ // and the counter.
+
+ Label inner_loop;
+ __ bind(inner_loop);
+ // Counter to encrypt is in v0
+ __ aesecb_encrypt(noreg, noreg, keylen);
+ __ st1(v0, __ T16B, saved_encrypted_ctr);
+
+ // Do we have a remaining full block?
+
+ __ mov(used, 0);
+ __ cmp(len, block_size);
+ __ br(__ LO, NEXT);
+
+ // Yes, we have a full block
+ __ ldrq(v1, Address(in, offset));
+ __ eor(v1, __ T16B, v1, v0);
+ __ strq(v1, Address(out, offset));
+ __ mov(used, block_size);
+ __ add(offset, offset, block_size);
+
+ __ subw(len, len, block_size);
+ __ cbzw(len, DONE);
+
+ // Increment the counter, store it back
+ __ orr(v0, __ T16B, v16, v16);
+ __ rev32(v16, __ T16B, v16);
+ __ addv(v16, __ T4S, v16, v4);
+ __ rev32(v16, __ T16B, v16);
+ __ st1(v16, __ T16B, counter); // Save the incremented counter back
+
+ __ b(inner_loop);
+ }
+
+ __ BIND(NEXT);
+
+ // Encrypt a single byte, and loop.
+ // We expect this to be a rare event.
+ __ ldrb(rscratch1, Address(in, offset));
+ __ ldrb(rscratch2, Address(saved_encrypted_ctr, used));
+ __ eor(rscratch1, rscratch1, rscratch2);
+ __ strb(rscratch1, Address(out, offset));
+ __ add(offset, offset, 1);
+ __ add(used, used, 1);
+ __ subw(len, len,1);
+ __ cbnzw(len, L_CTR_loop);
+ }
+
+ __ bind(DONE);
+ __ strw(used, Address(used_ptr));
+ __ mov(r0, saved_len);
+
+ __ leave(); // required for proper stackwalking of RuntimeStub frame
+ __ ret(lr);
+
+ // Bulk encryption
+
+ __ BIND (CTR_large_block);
+ assert(bulk_width == 4 || bulk_width == 8, "must be");
+
+ if (bulk_width == 8) {
+ __ sub(sp, sp, 4 * 16);
+ __ st1(v12, v13, v14, v15, __ T16B, Address(sp));
+ }
+ __ sub(sp, sp, 4 * 16);
+ __ st1(v8, v9, v10, v11, __ T16B, Address(sp));
+ RegSet saved_regs = (RegSet::of(in, out, offset)
+ + RegSet::of(saved_encrypted_ctr, used_ptr, len));
+ __ push(saved_regs, sp);
+ __ andr(len, len, -16 * bulk_width); // 8/4 encryptions, 16 bytes per encryption
+ __ add(in, in, offset);
+ __ add(out, out, offset);
+
+ // Keys should already be loaded into the correct registers
+
+ __ ld1(v0, __ T16B, counter); // v0 contains the first counter
+ __ rev32(v16, __ T16B, v0); // v16 contains byte-reversed counter
+
+ // AES/CTR loop
+ {
+ Label L_CTR_loop;
+ __ BIND(L_CTR_loop);
+
+ // Setup the counters
+ __ movi(v8, __ T4S, 0);
+ __ movi(v9, __ T4S, 1);
+ __ ins(v8, __ S, v9, 3, 3); // v8 contains { 0, 0, 0, 1 }
+
+ for (FloatRegister f = v0; f < v0 + bulk_width; f++) {
+ __ rev32(f, __ T16B, v16);
+ __ addv(v16, __ T4S, v16, v8);
+ }
+
+ __ ld1(v8, v9, v10, v11, __ T16B, __ post(in, 4 * 16));
+
+ // Encrypt the counters
+ __ aesecb_encrypt(noreg, noreg, keylen, v0, bulk_width);
+
+ if (bulk_width == 8) {
+ __ ld1(v12, v13, v14, v15, __ T16B, __ post(in, 4 * 16));
+ }
+
+ // XOR the encrypted counters with the inputs
+ for (int i = 0; i < bulk_width; i++) {
+ __ eor(v0 + i, __ T16B, v0 + i, v8 + i);
+ }
+
+ // Write the encrypted data
+ __ st1(v0, v1, v2, v3, __ T16B, __ post(out, 4 * 16));
+ if (bulk_width == 8) {
+ __ st1(v4, v5, v6, v7, __ T16B, __ post(out, 4 * 16));
+ }
+
+ __ subw(len, len, 16 * bulk_width);
+ __ cbnzw(len, L_CTR_loop);
+ }
+
+ // Save the counter back where it goes
+ __ rev32(v16, __ T16B, v16);
+ __ st1(v16, __ T16B, counter);
+
+ __ pop(saved_regs, sp);
+
+ __ ld1(v8, v9, v10, v11, __ T16B, __ post(sp, 4 * 16));
+ if (bulk_width == 8) {
+ __ ld1(v12, v13, v14, v15, __ T16B, __ post(sp, 4 * 16));
+ }
+
+ __ andr(rscratch1, len, -16 * bulk_width);
+ __ sub(len, len, rscratch1);
+ __ add(offset, offset, rscratch1);
+ __ mov(used, 16);
+ __ strw(used, Address(used_ptr));
+ __ b(large_block_return);
+
+ return start;
+ }
+
+
// Arguments:
//
// Inputs:
@@ -3677,6 +3937,56 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
+ address generate_ghash_processBlocks_wide() {
+ address small = generate_ghash_processBlocks();
+
+ StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks_wide");
+ __ align(wordSize * 2);
+ address p = __ pc();
+ __ emit_int64(0x87); // The low-order bits of the field
+ // polynomial (i.e. p = z^7+z^2+z+1)
+ // repeated in the low and high parts of a
+ // 128-bit vector
+ __ emit_int64(0x87);
+
+ __ align(CodeEntryAlignment);
+ address start = __ pc();
+
+ Register state = c_rarg0;
+ Register subkeyH = c_rarg1;
+ Register data = c_rarg2;
+ Register blocks = c_rarg3;
+
+ const int unroll = 4;
+
+ __ cmp(blocks, (unsigned char)(unroll * 2));
+ __ br(__ LT, small);
+
+ if (unroll > 1) {
+ // Save state before entering routine
+ __ sub(sp, sp, 4 * 16);
+ __ st1(v12, v13, v14, v15, __ T16B, Address(sp));
+ __ sub(sp, sp, 4 * 16);
+ __ st1(v8, v9, v10, v11, __ T16B, Address(sp));
+ }
+
+ __ ghash_processBlocks_wide(p, state, subkeyH, data, blocks, unroll);
+
+ if (unroll > 1) {
+ // And restore state
+ __ ld1(v8, v9, v10, v11, __ T16B, __ post(sp, 4 * 16));
+ __ ld1(v12, v13, v14, v15, __ T16B, __ post(sp, 4 * 16));
+ }
+
+ __ cmp(blocks, 0u);
+ __ br(__ GT, small);
+
+ __ ret(lr);
+
+ return start;
+ }
+
+
// Continuation point for throwing of implicit exceptions that are
// not handled in the current activation. Fabricates an exception
// oop and initiates normal exception dispatching in this
@@ -4687,6 +4997,15 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_montgomerySquare = g.generate_multiply();
}
+ // generate GHASH intrinsics code
+ if (UseGHASHIntrinsics) {
+ if (UseAESCTRIntrinsics) {
+ StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks_wide();
+ } else {
+ StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
+ }
+ }
+
if (UseAESIntrinsics) {
StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
@@ -4694,9 +5013,8 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();
}
- // generate GHASH intrinsics code
- if (UseGHASHIntrinsics) {
- StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
+ if (UseAESCTRIntrinsics) {
+ StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt();
}
if (UseSHA1Intrinsics) {
diff --git a/hotspot/src/cpu/aarch64/vm/stubRoutines_aarch64.hpp b/hotspot/src/cpu/aarch64/vm/stubRoutines_aarch64.hpp
index d1c312ab3..05619ce7f 100644
--- a/hotspot/src/cpu/aarch64/vm/stubRoutines_aarch64.hpp
+++ b/hotspot/src/cpu/aarch64/vm/stubRoutines_aarch64.hpp
@@ -37,7 +37,7 @@ static bool returns_to_call_stub(address return_pc) {
enum platform_dependent_constants {
code_size1 = 19000, // simply increase if too small (assembler will crash if too small)
- code_size2 = 22000 // simply increase if too small (assembler will crash if too small)
+ code_size2 = 32000 // simply increase if too small (assembler will crash if too small)
};
class aarch64 {
diff --git a/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp
index 9808337a0..de636fb83 100644
--- a/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp
+++ b/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp
@@ -233,12 +233,21 @@ void VM_Version::get_processor_features() {
warning("UseAESIntrinsics enabled, but UseAES not, enabling");
UseAES = true;
}
+ if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
+ FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
+ }
} else {
if (UseAES) {
- warning("UseAES specified, but not supported on this CPU");
+ warning("AES instructions are not available on this CPU");
+ FLAG_SET_DEFAULT(UseAES, false);
}
if (UseAESIntrinsics) {
- warning("UseAESIntrinsics specified, but not supported on this CPU");
+ warning("AES intrinsics are not available on this CPU");
+ FLAG_SET_DEFAULT(UseAESIntrinsics, false);
+ }
+ if (UseAESCTRIntrinsics) {
+ warning("AES/CTR intrinsics are not available on this CPU");
+ FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
}
}
diff --git a/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp b/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp
index b5ce1cfa9..fea8b1f87 100644
--- a/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp
@@ -194,6 +194,11 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseAESIntrinsics, false);
}
+ if (UseAESCTRIntrinsics) {
+ warning("AES/CTR intrinsics are not available on this CPU");
+ FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
+ }
+
if (UseGHASHIntrinsics) {
warning("GHASH intrinsics are not available on this CPU");
FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
diff --git a/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp b/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp
index bd893e138..08d7a7311 100644
--- a/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp
@@ -319,6 +319,11 @@ void VM_Version::initialize() {
}
}
+ if (UseAESCTRIntrinsics) {
+ warning("AES/CTR intrinsics are not available on this CPU");
+ FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
+ }
+
// GHASH/GCM intrinsics
if (has_vis3() && (UseVIS > 2)) {
if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
diff --git a/hotspot/src/cpu/x86/vm/assembler_x86.cpp b/hotspot/src/cpu/x86/vm/assembler_x86.cpp
index 1759ecdfd..ddc1acfd8 100644
--- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp
@@ -2373,20 +2373,52 @@ void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
void Assembler::pextrd(Register dst, XMMRegister src, int imm8) {
assert(VM_Version::supports_sse4_1(), "");
- int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, false);
+ int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, false);
emit_int8(0x16);
emit_int8((unsigned char)(0xC0 | encode));
emit_int8(imm8);
}
+void Assembler::pextrd(Address dst, XMMRegister src, int imm8) {
+ assert(VM_Version::supports_sse4_1(), "");
+ simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, false);
+ emit_int8(0x16);
+ emit_operand(src, dst);
+ emit_int8(imm8);
+}
+
void Assembler::pextrq(Register dst, XMMRegister src, int imm8) {
assert(VM_Version::supports_sse4_1(), "");
- int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, true);
+ int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, true);
emit_int8(0x16);
emit_int8((unsigned char)(0xC0 | encode));
emit_int8(imm8);
}
+void Assembler::pextrq(Address dst, XMMRegister src, int imm8) {
+ assert(VM_Version::supports_sse4_1(), "");
+ simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, true);
+ emit_int8(0x16);
+ emit_operand(src, dst);
+ emit_int8(imm8);
+}
+
+void Assembler::pextrw(Address dst, XMMRegister src, int imm8) {
+ assert(VM_Version::supports_sse4_1(), "");
+ simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A);
+ emit_int8((unsigned char)0x15);
+ emit_operand(src, dst);
+ emit_int8(imm8);
+}
+
+void Assembler::pextrb(Address dst, XMMRegister src, int imm8) {
+ assert(VM_Version::supports_sse4_1(), "");
+ simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A);
+ emit_int8(0x14);
+ emit_operand(src, dst);
+ emit_int8(imm8);
+}
+
void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
assert(VM_Version::supports_sse4_1(), "");
int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, false);
@@ -2395,6 +2427,14 @@ void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
emit_int8(imm8);
}
+void Assembler::pinsrd(XMMRegister dst, Address src, int imm8) {
+ assert(VM_Version::supports_sse4_1(), "");
+ simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, false);
+ emit_int8(0x22);
+ emit_operand(dst,src);
+ emit_int8(imm8);
+}
+
void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) {
assert(VM_Version::supports_sse4_1(), "");
int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, true);
@@ -2403,6 +2443,30 @@ void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) {
emit_int8(imm8);
}
+void Assembler::pinsrq(XMMRegister dst, Address src, int imm8) {
+ assert(VM_Version::supports_sse4_1(), "");
+ simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, true);
+ emit_int8(0x22);
+ emit_operand(dst, src);
+ emit_int8(imm8);
+}
+
+void Assembler::pinsrw(XMMRegister dst, Address src, int imm8) {
+ assert(VM_Version::supports_sse2(), "");
+ simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F);
+ emit_int8((unsigned char)0xC4);
+ emit_operand(dst, src);
+ emit_int8(imm8);
+}
+
+void Assembler::pinsrb(XMMRegister dst, Address src, int imm8) {
+ assert(VM_Version::supports_sse4_1(), "");
+ simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
+ emit_int8(0x20);
+ emit_operand(dst, src);
+ emit_int8(imm8);
+}
+
void Assembler::pmovzxbw(XMMRegister dst, Address src) {
assert(VM_Version::supports_sse4_1(), "");
InstructionMark im(this);
@@ -3075,6 +3139,12 @@ void Assembler::xorl(Register dst, Register src) {
emit_arith(0x33, 0xC0, dst, src);
}
+void Assembler::xorb(Register dst, Address src) {
+ InstructionMark im(this);
+ prefix(src, dst);
+ emit_int8(0x32);
+ emit_operand(dst, src);
+}
// AVX 3-operands scalar float-point arithmetic instructions
diff --git a/hotspot/src/cpu/x86/vm/assembler_x86.hpp b/hotspot/src/cpu/x86/vm/assembler_x86.hpp
index 5ea01311e..c2e70bc2a 100644
--- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp
@@ -1479,10 +1479,20 @@ private:
// SSE 4.1 extract
void pextrd(Register dst, XMMRegister src, int imm8);
void pextrq(Register dst, XMMRegister src, int imm8);
+ void pextrd(Address dst, XMMRegister src, int imm8);
+ void pextrq(Address dst, XMMRegister src, int imm8);
+ void pextrb(Address dst, XMMRegister src, int imm8);
+ // SSE 2 extract
+ void pextrw(Address dst, XMMRegister src, int imm8);
// SSE 4.1 insert
void pinsrd(XMMRegister dst, Register src, int imm8);
void pinsrq(XMMRegister dst, Register src, int imm8);
+ void pinsrd(XMMRegister dst, Address src, int imm8);
+ void pinsrq(XMMRegister dst, Address src, int imm8);
+ void pinsrb(XMMRegister dst, Address src, int imm8);
+ // SSE 2 insert
+ void pinsrw(XMMRegister dst, Address src, int imm8);
// SSE4.1 packed move
void pmovzxbw(XMMRegister dst, XMMRegister src);
@@ -1687,6 +1697,8 @@ private:
void xorl(Register dst, Address src);
void xorl(Register dst, Register src);
+ void xorb(Register dst, Address src);
+
void xorq(Register dst, Address src);
void xorq(Register dst, Register src);
diff --git a/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp b/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp
index 2e5599807..f555f3326 100644
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp
@@ -2153,6 +2153,17 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
+ address generate_counter_shuffle_mask() {
+ __ align(16);
+ StubCodeMark mark(this, "StubRoutines", "counter_shuffle_mask");
+ address start = __ pc();
+ __ emit_data(0x0c0d0e0f, relocInfo::none, 0);
+ __ emit_data(0x08090a0b, relocInfo::none, 0);
+ __ emit_data(0x04050607, relocInfo::none, 0);
+ __ emit_data(0x00010203, relocInfo::none, 0);
+ return start;
+ }
+
// Utility routine for loading a 128-bit key word in little endian format
// can optionally specify that the shuffle mask is already in an xmmregister
void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
@@ -2178,6 +2189,31 @@ class StubGenerator: public StubCodeGenerator {
__ aesdec(xmmdst, xmmtmp);
}
+ // Utility routine for increase 128bit counter (iv in CTR mode)
+ // XMM_128bit, D3, D2, D1, D0
+ void inc_counter(Register reg, XMMRegister xmmdst, int inc_delta, Label& next_block) {
+ __ pextrd(reg, xmmdst, 0x0);
+ __ addl(reg, inc_delta);
+ __ pinsrd(xmmdst, reg, 0x0);
+ __ jcc(Assembler::carryClear, next_block); // jump if no carry
+
+ __ pextrd(reg, xmmdst, 0x01); // Carry-> D1
+ __ addl(reg, 0x01);
+ __ pinsrd(xmmdst, reg, 0x01);
+ __ jcc(Assembler::carryClear, next_block); // jump if no carry
+
+ __ pextrd(reg, xmmdst, 0x02); // Carry-> D2
+ __ addl(reg, 0x01);
+ __ pinsrd(xmmdst, reg, 0x02);
+ __ jcc(Assembler::carryClear, next_block); // jump if no carry
+
+ __ pextrd(reg, xmmdst, 0x03); // Carry -> D3
+ __ addl(reg, 0x01);
+ __ pinsrd(xmmdst, reg, 0x03);
+
+ __ BIND(next_block); // next instruction
+ }
+
// Arguments:
//
@@ -2719,6 +2755,309 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
+
+ // CTR AES crypt.
+ // In 32-bit stub, parallelize 4 blocks at a time
+ // Arguments:
+ //
+ // Inputs:
+ // c_rarg0 - source byte array address
+ // c_rarg1 - destination byte array address
+ // c_rarg2 - K (key) in little endian int array
+ // c_rarg3 - counter vector byte array address
+ // c_rarg4 - input length
+ //
+ // Output:
+ // rax - input length
+ //
+ address generate_counterMode_AESCrypt_Parallel() {
+ assert(UseAES, "need AES instructions and misaligned SSE support");
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "counterMode_AESCrypt");
+ address start = __ pc();
+ const Register from = rsi; // source array address
+ const Register to = rdx; // destination array address
+ const Register key = rcx; // key array address
+ const Register counter = rdi; // counter byte array initialized from initvector array address
+
+ // and left with the results of the last encryption block
+ const Register len_reg = rbx;
+ const Register pos = rax;
+
+ __ enter(); // required for proper stackwalking of RuntimeStub frame
+ handleSOERegisters(true /*saving*/); // save rbx, rsi, rdi
+
+ // load registers from incoming parameters
+ const Address from_param(rbp, 8+0);
+ const Address to_param (rbp, 8+4);
+ const Address key_param (rbp, 8+8);
+ const Address rvec_param (rbp, 8+12);
+ const Address len_param (rbp, 8+16);
+ const Address saved_counter_param(rbp, 8 + 20);
+ const Address used_addr_param(rbp, 8 + 24);
+
+ __ movptr(from , from_param);
+ __ movptr(to , to_param);
+ //__ movptr(key, key_param);
+ //__ movptr(counter, rvec_param);
+ __ movptr(len_reg , len_param);
+ //__ movptr(pos, 0);
+
+ // Use the partially used encrpyted counter from last invocation
+ Label L_exit_preLoop, L_preLoop_start;
+
+ // Use the registers 'counter' and 'key' here in this preloop
+ // to hold of last 2 params 'used' and 'saved_encCounter_start'
+ Register used = counter;
+ Register saved_encCounter_start = key;
+ Register used_addr = saved_encCounter_start;
+
+ __ movptr(used_addr, used_addr_param);
+ __ movptr(used, Address(used_addr, 0));
+ __ movptr(saved_encCounter_start, saved_counter_param);
+
+ __ BIND(L_preLoop_start);
+ __ cmpptr(used, 16);
+ __ jcc(Assembler::aboveEqual, L_exit_preLoop);
+ __ cmpptr(len_reg, 0);
+ __ jcc(Assembler::lessEqual, L_exit_preLoop);
+ __ movb(rax, Address(saved_encCounter_start, used));
+ __ xorb(rax, Address(from, 0));
+ __ movb(Address(to, 0), rax);
+ __ addptr(from, 1);
+ __ addptr(to, 1);
+ __ addptr(used, 1);
+ __ subptr(len_reg, 1);
+
+ __ jmp(L_preLoop_start);
+
+ __ BIND(L_exit_preLoop);
+ __ movptr(used_addr, used_addr_param);
+ __ movptr(used_addr, used_addr_param);
+ __ movl(Address(used_addr, 0), used);
+
+ // load the parameters 'key' and 'counter'
+ __ movptr(key, key_param);
+ __ movptr(counter, rvec_param);
+
+ // xmm register assignments for the loops below
+ const XMMRegister xmm_curr_counter = xmm0;
+ const XMMRegister xmm_counter_shuf_mask = xmm1; // need to be reloaded
+ const XMMRegister xmm_key_shuf_mask = xmm2; // need to be reloaded
+ const XMMRegister xmm_key = xmm3;
+ const XMMRegister xmm_result0 = xmm4;
+ const XMMRegister xmm_result1 = xmm5;
+ const XMMRegister xmm_result2 = xmm6;
+ const XMMRegister xmm_result3 = xmm7;
+ const XMMRegister xmm_from0 = xmm1; //reuse XMM register
+ const XMMRegister xmm_from1 = xmm2;
+ const XMMRegister xmm_from2 = xmm3;
+ const XMMRegister xmm_from3 = xmm4;
+
+ //for key_128, key_192, key_256
+ const int rounds[3] = {10, 12, 14};
+ Label L_singleBlockLoopTop[3];
+ Label L_multiBlock_loopTop[3];
+ Label L_key192_top, L_key256_top;
+ Label L_incCounter[3][4]; // 3: different key length, 4: 4 blocks at a time
+ Label L_incCounter_single[3]; //for single block, key128, key192, key256
+ Label L_processTail_insr[3], L_processTail_4_insr[3], L_processTail_2_insr[3], L_processTail_1_insr[3], L_processTail_exit_insr[3];
+ Label L_processTail_extr[3], L_processTail_4_extr[3], L_processTail_2_extr[3], L_processTail_1_extr[3], L_processTail_exit_extr[3];
+
+ Label L_exit;
+ const int PARALLEL_FACTOR = 4; //because of the limited register number
+
+ // initialize counter with initial counter
+ __ movdqu(xmm_curr_counter, Address(counter, 0x00));
+ __ movdqu(xmm_counter_shuf_mask, ExternalAddress(StubRoutines::x86::counter_shuffle_mask_addr()));
+ __ pshufb(xmm_curr_counter, xmm_counter_shuf_mask); //counter is shuffled for increase
+
+ // key length could be only {11, 13, 15} * 4 = {44, 52, 60}
+ __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+ __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+ __ cmpl(rax, 52);
+ __ jcc(Assembler::equal, L_key192_top);
+ __ cmpl(rax, 60);
+ __ jcc(Assembler::equal, L_key256_top);
+
+ //key128 begins here
+ __ movptr(pos, 0); // init pos before L_multiBlock_loopTop
+
+#define CTR_DoFour(opc, src_reg) \
+ __ opc(xmm_result0, src_reg); \
+ __ opc(xmm_result1, src_reg); \
+ __ opc(xmm_result2, src_reg); \
+ __ opc(xmm_result3, src_reg);
+
+ // k == 0 : generate code for key_128
+ // k == 1 : generate code for key_192
+ // k == 2 : generate code for key_256
+ for (int k = 0; k < 3; ++k) {
+ //multi blocks starts here
+ __ align(OptoLoopAlignment);
+ __ BIND(L_multiBlock_loopTop[k]);
+ __ cmpptr(len_reg, PARALLEL_FACTOR * AESBlockSize); // see if at least PARALLEL_FACTOR blocks left
+ __ jcc(Assembler::less, L_singleBlockLoopTop[k]);
+
+ __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+ __ movdqu(xmm_counter_shuf_mask, ExternalAddress(StubRoutines::x86::counter_shuffle_mask_addr()));
+
+ //load, then increase counters
+ CTR_DoFour(movdqa, xmm_curr_counter);
+ __ push(rbx);
+ inc_counter(rbx, xmm_result1, 0x01, L_incCounter[k][0]);
+ inc_counter(rbx, xmm_result2, 0x02, L_incCounter[k][1]);
+ inc_counter(rbx, xmm_result3, 0x03, L_incCounter[k][2]);
+ inc_counter(rbx, xmm_curr_counter, 0x04, L_incCounter[k][3]);
+ __ pop (rbx);
+
+ load_key(xmm_key, key, 0x00, xmm_key_shuf_mask); // load Round 0 key. interleaving for better performance
+
+ CTR_DoFour(pshufb, xmm_counter_shuf_mask); // after increased, shuffled counters back for PXOR
+ CTR_DoFour(pxor, xmm_key); //PXOR with Round 0 key
+
+ for (int i = 1; i < rounds[k]; ++i) {
+ load_key(xmm_key, key, (0x10 * i), xmm_key_shuf_mask);
+ CTR_DoFour(aesenc, xmm_key);
+ }
+ load_key(xmm_key, key, (0x10 * rounds[k]), xmm_key_shuf_mask);
+ CTR_DoFour(aesenclast, xmm_key);
+
+ // get next PARALLEL_FACTOR blocks into xmm_from registers
+ __ movdqu(xmm_from0, Address(from, pos, Address::times_1, 0 * AESBlockSize));
+ __ movdqu(xmm_from1, Address(from, pos, Address::times_1, 1 * AESBlockSize));
+ __ movdqu(xmm_from2, Address(from, pos, Address::times_1, 2 * AESBlockSize));
+
+ // PXOR with input text
+ __ pxor(xmm_result0, xmm_from0); //result0 is xmm4
+ __ pxor(xmm_result1, xmm_from1);
+ __ pxor(xmm_result2, xmm_from2);
+
+ // store PARALLEL_FACTOR results into the next 64 bytes of output
+ __ movdqu(Address(to, pos, Address::times_1, 0 * AESBlockSize), xmm_result0);
+ __ movdqu(Address(to, pos, Address::times_1, 1 * AESBlockSize), xmm_result1);
+ __ movdqu(Address(to, pos, Address::times_1, 2 * AESBlockSize), xmm_result2);
+
+ // do it here after xmm_result0 is saved, because xmm_from3 reuse the same register of xmm_result0.
+ __ movdqu(xmm_from3, Address(from, pos, Address::times_1, 3 * AESBlockSize));
+ __ pxor(xmm_result3, xmm_from3);
+ __ movdqu(Address(to, pos, Address::times_1, 3 * AESBlockSize), xmm_result3);
+
+ __ addptr(pos, PARALLEL_FACTOR * AESBlockSize); // increase the length of crypt text
+ __ subptr(len_reg, PARALLEL_FACTOR * AESBlockSize); // decrease the remaining length
+ __ jmp(L_multiBlock_loopTop[k]);
+
+ // singleBlock starts here
+ __ align(OptoLoopAlignment);
+ __ BIND(L_singleBlockLoopTop[k]);
+ __ cmpptr(len_reg, 0);
+ __ jcc(Assembler::equal, L_exit);
+ __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+ __ movdqu(xmm_counter_shuf_mask, ExternalAddress(StubRoutines::x86::counter_shuffle_mask_addr()));
+ __ movdqa(xmm_result0, xmm_curr_counter);
+ load_key(xmm_key, key, 0x00, xmm_key_shuf_mask);
+ __ push(rbx);//rbx is used for increasing counter
+ inc_counter(rbx, xmm_curr_counter, 0x01, L_incCounter_single[k]);
+ __ pop (rbx);
+ __ pshufb(xmm_result0, xmm_counter_shuf_mask);
+ __ pxor(xmm_result0, xmm_key);
+ for (int i = 1; i < rounds[k]; i++) {
+ load_key(xmm_key, key, (0x10 * i), xmm_key_shuf_mask);
+ __ aesenc(xmm_result0, xmm_key);
+ }
+ load_key(xmm_key, key, (0x10 * rounds[k]), xmm_key_shuf_mask);
+ __ aesenclast(xmm_result0, xmm_key);
+ __ cmpptr(len_reg, AESBlockSize);
+ __ jcc(Assembler::less, L_processTail_insr[k]);
+ __ movdqu(xmm_from0, Address(from, pos, Address::times_1, 0 * AESBlockSize));
+ __ pxor(xmm_result0, xmm_from0);
+ __ movdqu(Address(to, pos, Address::times_1, 0 * AESBlockSize), xmm_result0);
+ __ addptr(pos, AESBlockSize);
+ __ subptr(len_reg, AESBlockSize);
+ __ jmp(L_singleBlockLoopTop[k]);
+
+ __ BIND(L_processTail_insr[k]);
+ __ addptr(pos, len_reg);
+ __ testptr(len_reg, 8);
+ __ jcc(Assembler::zero, L_processTail_4_insr[k]);
+ __ subptr(pos,8);
+ __ pinsrd(xmm_from0, Address(from, pos), 0);
+ __ pinsrd(xmm_from0, Address(from, pos, Address::times_1, 4), 1);
+ __ BIND(L_processTail_4_insr[k]);
+ __ testptr(len_reg, 4);
+ __ jcc(Assembler::zero, L_processTail_2_insr[k]);
+ __ subptr(pos,4);
+ __ pslldq(xmm_from0, 4);
+ __ pinsrd(xmm_from0, Address(from, pos), 0);
+ __ BIND(L_processTail_2_insr[k]);
+ __ testptr(len_reg, 2);
+ __ jcc(Assembler::zero, L_processTail_1_insr[k]);
+ __ subptr(pos, 2);
+ __ pslldq(xmm_from0, 2);
+ __ pinsrw(xmm_from0, Address(from, pos), 0);
+ __ BIND(L_processTail_1_insr[k]);
+ __ testptr(len_reg, 1);
+ __ jcc(Assembler::zero, L_processTail_exit_insr[k]);
+ __ subptr(pos, 1);
+ __ pslldq(xmm_from0, 1);
+ __ pinsrb(xmm_from0, Address(from, pos), 0);
+ __ BIND(L_processTail_exit_insr[k]);
+
+ __ movptr(saved_encCounter_start, saved_counter_param);
+ __ movdqu(Address(saved_encCounter_start, 0), xmm_result0);
+ __ pxor(xmm_result0, xmm_from0);
+
+ __ testptr(len_reg, 8);
+ __ jcc(Assembler::zero, L_processTail_4_extr[k]);
+ __ pextrd(Address(to, pos), xmm_result0, 0);
+ __ pextrd(Address(to, pos, Address::times_1, 4), xmm_result0, 1);
+ __ psrldq(xmm_result0, 8);
+ __ addptr(pos, 8);
+ __ BIND(L_processTail_4_extr[k]);
+ __ testptr(len_reg, 4);
+ __ jcc(Assembler::zero, L_processTail_2_extr[k]);
+ __ pextrd(Address(to, pos), xmm_result0, 0);
+ __ psrldq(xmm_result0, 4);
+ __ addptr(pos, 4);
+ __ BIND(L_processTail_2_extr[k]);
+ __ testptr(len_reg, 2);
+ __ jcc(Assembler::zero, L_processTail_1_extr[k]);
+ __ pextrb(Address(to, pos), xmm_result0, 0);
+ __ pextrb(Address(to, pos, Address::times_1, 1), xmm_result0, 1);
+ __ psrldq(xmm_result0, 2);
+ __ addptr(pos, 2);
+ __ BIND(L_processTail_1_extr[k]);
+ __ testptr(len_reg, 1);
+ __ jcc(Assembler::zero, L_processTail_exit_extr[k]);
+ __ pextrb(Address(to, pos), xmm_result0, 0);
+
+ __ BIND(L_processTail_exit_extr[k]);
+ __ movptr(used_addr, used_addr_param);
+ __ movl(Address(used_addr, 0), len_reg);
+ __ jmp(L_exit);
+ }
+
+ __ BIND(L_exit);
+ __ movdqu(xmm_counter_shuf_mask, ExternalAddress(StubRoutines::x86::counter_shuffle_mask_addr()));
+ __ pshufb(xmm_curr_counter, xmm_counter_shuf_mask); //counter is shuffled back.
+ __ movdqu(Address(counter, 0), xmm_curr_counter); //save counter back
+ handleSOERegisters(false /*restoring*/);
+ __ movptr(rax, len_param); // return length
+ __ leave(); // required for proper stackwalking of RuntimeStub frame
+ __ ret(0);
+
+ __ BIND (L_key192_top);
+ __ movptr(pos, 0); // init pos before L_multiBlock_loopTop
+ __ jmp(L_multiBlock_loopTop[1]); //key192
+
+ __ BIND (L_key256_top);
+ __ movptr(pos, 0); // init pos before L_multiBlock_loopTop
+ __ jmp(L_multiBlock_loopTop[2]); //key192
+
+ return start;
+ }
+
+
// byte swap x86 long
address generate_ghash_long_swap_mask() {
__ align(CodeEntryAlignment);
@@ -3181,6 +3520,11 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();
}
+ if (UseAESCTRIntrinsics) {
+ StubRoutines::x86::_counter_shuffle_mask_addr = generate_counter_shuffle_mask();
+ StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt_Parallel();
+ }
+
// Generate GHASH intrinsics code
if (UseGHASHIntrinsics) {
StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
diff --git a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp
index c5811b28b..254f63392 100644
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp
@@ -3010,6 +3010,15 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
+ address generate_counter_shuffle_mask() {
+ __ align(16);
+ StubCodeMark mark(this, "StubRoutines", "counter_shuffle_mask");
+ address start = __ pc();
+ __ emit_data64(0x08090a0b0c0d0e0f, relocInfo::none);
+ __ emit_data64(0x0001020304050607, relocInfo::none);
+ return start;
+ }
+
// Utility routine for loading a 128-bit key word in little endian format
// can optionally specify that the shuffle mask is already in an xmmregister
void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
@@ -3021,6 +3030,18 @@ class StubGenerator: public StubCodeGenerator {
}
}
+ // Utility routine for increase 128bit counter (iv in CTR mode)
+ void inc_counter(Register reg, XMMRegister xmmdst, int inc_delta, Label& next_block) {
+ __ pextrq(reg, xmmdst, 0x0);
+ __ addq(reg, inc_delta);
+ __ pinsrq(xmmdst, reg, 0x0);
+ __ jcc(Assembler::carryClear, next_block); // jump if no carry
+ __ pextrq(reg, xmmdst, 0x01); // Carry
+ __ addq(reg, 0x01);
+ __ pinsrq(xmmdst, reg, 0x01); //Carry end
+ __ BIND(next_block); // next instruction
+ }
+
// Arguments:
//
// Inputs:
@@ -3639,6 +3660,320 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
+ // This is a version of CTR/AES crypt which does 6 blocks in a loop at a time
+ // to hide instruction latency
+ //
+ // Arguments:
+ //
+ // Inputs:
+ // c_rarg0 - source byte array address
+ // c_rarg1 - destination byte array address
+ // c_rarg2 - K (key) in little endian int array
+ // c_rarg3 - counter vector byte array address
+ // Linux
+ // c_rarg4 - input length
+ // c_rarg5 - saved encryptedCounter start
+ // rbp + 6 * wordSize - saved used length
+ // Windows
+ // rbp + 6 * wordSize - input length
+ // rbp + 7 * wordSize - saved encryptedCounter start
+ // rbp + 8 * wordSize - saved used length
+ //
+ // Output:
+ // rax - input length
+ //
+ address generate_counterMode_AESCrypt_Parallel() {
+ assert(UseAES, "need AES instructions and misaligned SSE support");
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "counterMode_AESCrypt");
+ address start = __ pc();
+ const Register from = c_rarg0; // source array address
+ const Register to = c_rarg1; // destination array address
+ const Register key = c_rarg2; // key array address
+ const Register counter = c_rarg3; // counter byte array initialized from counter array address
+ // and left with the results of the last encryption block
+#ifndef _WIN64
+ const Register len_reg = c_rarg4;
+ const Register saved_encCounter_start = c_rarg5;
+ const Register used_addr = r10;
+ const Address used_mem(rbp, 2 * wordSize);
+ const Register used = r11;
+#else
+ const Address len_mem(rbp, 6 * wordSize); // length is on stack on Win64
+ const Address saved_encCounter_mem(rbp, 7 * wordSize); // length is on stack on Win64
+ const Address used_mem(rbp, 8 * wordSize); // length is on stack on Win64
+ const Register len_reg = r10; // pick the first volatile windows register
+ const Register saved_encCounter_start = r11;
+ const Register used_addr = r13;
+ const Register used = r14;
+#endif
+ const Register pos = rax;
+
+ const int PARALLEL_FACTOR = 6;
+ const XMMRegister xmm_counter_shuf_mask = xmm0;
+ const XMMRegister xmm_key_shuf_mask = xmm1; // used temporarily to swap key bytes up front
+ const XMMRegister xmm_curr_counter = xmm2;
+
+ const XMMRegister xmm_key_tmp0 = xmm3;
+ const XMMRegister xmm_key_tmp1 = xmm4;
+
+ // registers holding the four results in the parallelized loop
+ const XMMRegister xmm_result0 = xmm5;
+ const XMMRegister xmm_result1 = xmm6;
+ const XMMRegister xmm_result2 = xmm7;
+ const XMMRegister xmm_result3 = xmm8;
+ const XMMRegister xmm_result4 = xmm9;
+ const XMMRegister xmm_result5 = xmm10;
+
+ const XMMRegister xmm_from0 = xmm11;
+ const XMMRegister xmm_from1 = xmm12;
+ const XMMRegister xmm_from2 = xmm13;
+ const XMMRegister xmm_from3 = xmm14; //the last one is xmm14. we have to preserve it on WIN64.
+ const XMMRegister xmm_from4 = xmm3; //reuse xmm3~4. Because xmm_key_tmp0~1 are useless when loading input text
+ const XMMRegister xmm_from5 = xmm4;
+
+ //for key_128, key_192, key_256
+ const int rounds[3] = {10, 12, 14};
+ Label L_exit_preLoop, L_preLoop_start;
+ Label L_multiBlock_loopTop[3];
+ Label L_singleBlockLoopTop[3];
+ Label L__incCounter[3][6]; //for 6 blocks
+ Label L__incCounter_single[3]; //for single block, key128, key192, key256
+ Label L_processTail_insr[3], L_processTail_4_insr[3], L_processTail_2_insr[3], L_processTail_1_insr[3], L_processTail_exit_insr[3];
+ Label L_processTail_extr[3], L_processTail_4_extr[3], L_processTail_2_extr[3], L_processTail_1_extr[3], L_processTail_exit_extr[3];
+
+ Label L_exit;
+
+ __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+#ifdef _WIN64
+ // save the xmm registers which must be preserved 6-14
+ const int XMM_REG_NUM_KEY_LAST = 14;
+ __ subptr(rsp, -rsp_after_call_off * wordSize);
+ for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
+ __ movdqu(xmm_save(i), as_XMMRegister(i));
+ }
+
+ const Address r13_save(rbp, rdi_off * wordSize);
+ const Address r14_save(rbp, rsi_off * wordSize);
+
+ __ movptr(r13_save, r13);
+ __ movptr(r14_save, r14);
+
+ // on win64, fill len_reg from stack position
+ __ movl(len_reg, len_mem);
+ __ movptr(saved_encCounter_start, saved_encCounter_mem);
+ __ movptr(used_addr, used_mem);
+ __ movl(used, Address(used_addr, 0));
+#else
+ __ push(len_reg); // Save
+ __ movptr(used_addr, used_mem);
+ __ movl(used, Address(used_addr, 0));
+#endif
+
+ __ push(rbx); // Save RBX
+ __ movdqu(xmm_curr_counter, Address(counter, 0x00)); // initialize counter with initial counter
+ __ movdqu(xmm_counter_shuf_mask, ExternalAddress(StubRoutines::x86::counter_shuffle_mask_addr()));
+ __ pshufb(xmm_curr_counter, xmm_counter_shuf_mask); //counter is shuffled
+ __ movptr(pos, 0);
+
+ // Use the partially used encrpyted counter from last invocation
+ __ BIND(L_preLoop_start);
+ __ cmpptr(used, 16);
+ __ jcc(Assembler::aboveEqual, L_exit_preLoop);
+ __ cmpptr(len_reg, 0);
+ __ jcc(Assembler::lessEqual, L_exit_preLoop);
+ __ movb(rbx, Address(saved_encCounter_start, used));
+ __ xorb(rbx, Address(from, pos));
+ __ movb(Address(to, pos), rbx);
+ __ addptr(pos, 1);
+ __ addptr(used, 1);
+ __ subptr(len_reg, 1);
+
+ __ jmp(L_preLoop_start);
+
+ __ BIND(L_exit_preLoop);
+ __ movl(Address(used_addr, 0), used);
+
+ // key length could be only {11, 13, 15} * 4 = {44, 52, 60}
+ __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+ __ movl(rbx, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+ __ cmpl(rbx, 52);
+ __ jcc(Assembler::equal, L_multiBlock_loopTop[1]);
+ __ cmpl(rbx, 60);
+ __ jcc(Assembler::equal, L_multiBlock_loopTop[2]);
+
+#define CTR_DoSix(opc, src_reg) \
+ __ opc(xmm_result0, src_reg); \
+ __ opc(xmm_result1, src_reg); \
+ __ opc(xmm_result2, src_reg); \
+ __ opc(xmm_result3, src_reg); \
+ __ opc(xmm_result4, src_reg); \
+ __ opc(xmm_result5, src_reg);
+
+ // k == 0 : generate code for key_128
+ // k == 1 : generate code for key_192
+ // k == 2 : generate code for key_256
+ for (int k = 0; k < 3; ++k) {
+ //multi blocks starts here
+ __ align(OptoLoopAlignment);
+ __ BIND(L_multiBlock_loopTop[k]);
+ __ cmpptr(len_reg, PARALLEL_FACTOR * AESBlockSize); // see if at least PARALLEL_FACTOR blocks left
+ __ jcc(Assembler::less, L_singleBlockLoopTop[k]);
+ load_key(xmm_key_tmp0, key, 0x00, xmm_key_shuf_mask);
+
+ //load, then increase counters
+ CTR_DoSix(movdqa, xmm_curr_counter);
+ inc_counter(rbx, xmm_result1, 0x01, L__incCounter[k][0]);
+ inc_counter(rbx, xmm_result2, 0x02, L__incCounter[k][1]);
+ inc_counter(rbx, xmm_result3, 0x03, L__incCounter[k][2]);
+ inc_counter(rbx, xmm_result4, 0x04, L__incCounter[k][3]);
+ inc_counter(rbx, xmm_result5, 0x05, L__incCounter[k][4]);
+ inc_counter(rbx, xmm_curr_counter, 0x06, L__incCounter[k][5]);
+ CTR_DoSix(pshufb, xmm_counter_shuf_mask); // after increased, shuffled counters back for PXOR
+ CTR_DoSix(pxor, xmm_key_tmp0); //PXOR with Round 0 key
+
+ //load two ROUND_KEYs at a time
+ for (int i = 1; i < rounds[k]; ) {
+ load_key(xmm_key_tmp1, key, (0x10 * i), xmm_key_shuf_mask);
+ load_key(xmm_key_tmp0, key, (0x10 * (i+1)), xmm_key_shuf_mask);
+ CTR_DoSix(aesenc, xmm_key_tmp1);
+ i++;
+ if (i != rounds[k]) {
+ CTR_DoSix(aesenc, xmm_key_tmp0);
+ } else {
+ CTR_DoSix(aesenclast, xmm_key_tmp0);
+ }
+ i++;
+ }
+
+ // get next PARALLEL_FACTOR blocks into xmm_result registers
+ __ movdqu(xmm_from0, Address(from, pos, Address::times_1, 0 * AESBlockSize));
+ __ movdqu(xmm_from1, Address(from, pos, Address::times_1, 1 * AESBlockSize));
+ __ movdqu(xmm_from2, Address(from, pos, Address::times_1, 2 * AESBlockSize));
+ __ movdqu(xmm_from3, Address(from, pos, Address::times_1, 3 * AESBlockSize));
+ __ movdqu(xmm_from4, Address(from, pos, Address::times_1, 4 * AESBlockSize));
+ __ movdqu(xmm_from5, Address(from, pos, Address::times_1, 5 * AESBlockSize));
+
+ __ pxor(xmm_result0, xmm_from0);
+ __ pxor(xmm_result1, xmm_from1);
+ __ pxor(xmm_result2, xmm_from2);
+ __ pxor(xmm_result3, xmm_from3);
+ __ pxor(xmm_result4, xmm_from4);
+ __ pxor(xmm_result5, xmm_from5);
+
+ // store 6 results into the next 64 bytes of output
+ __ movdqu(Address(to, pos, Address::times_1, 0 * AESBlockSize), xmm_result0);
+ __ movdqu(Address(to, pos, Address::times_1, 1 * AESBlockSize), xmm_result1);
+ __ movdqu(Address(to, pos, Address::times_1, 2 * AESBlockSize), xmm_result2);
+ __ movdqu(Address(to, pos, Address::times_1, 3 * AESBlockSize), xmm_result3);
+ __ movdqu(Address(to, pos, Address::times_1, 4 * AESBlockSize), xmm_result4);
+ __ movdqu(Address(to, pos, Address::times_1, 5 * AESBlockSize), xmm_result5);
+
+ __ addptr(pos, PARALLEL_FACTOR * AESBlockSize); // increase the length of crypt text
+ __ subptr(len_reg, PARALLEL_FACTOR * AESBlockSize); // decrease the remaining length
+ __ jmp(L_multiBlock_loopTop[k]);
+
+ // singleBlock starts here
+ __ align(OptoLoopAlignment);
+ __ BIND(L_singleBlockLoopTop[k]);
+ __ cmpptr(len_reg, 0);
+ __ jcc(Assembler::lessEqual, L_exit);
+ load_key(xmm_key_tmp0, key, 0x00, xmm_key_shuf_mask);
+ __ movdqa(xmm_result0, xmm_curr_counter);
+ inc_counter(rbx, xmm_curr_counter, 0x01, L__incCounter_single[k]);
+ __ pshufb(xmm_result0, xmm_counter_shuf_mask);
+ __ pxor(xmm_result0, xmm_key_tmp0);
+ for (int i = 1; i < rounds[k]; i++) {
+ load_key(xmm_key_tmp0, key, (0x10 * i), xmm_key_shuf_mask);
+ __ aesenc(xmm_result0, xmm_key_tmp0);
+ }
+ load_key(xmm_key_tmp0, key, (rounds[k] * 0x10), xmm_key_shuf_mask);
+ __ aesenclast(xmm_result0, xmm_key_tmp0);
+ __ cmpptr(len_reg, AESBlockSize);
+ __ jcc(Assembler::less, L_processTail_insr[k]);
+ __ movdqu(xmm_from0, Address(from, pos, Address::times_1, 0 * AESBlockSize));
+ __ pxor(xmm_result0, xmm_from0);
+ __ movdqu(Address(to, pos, Address::times_1, 0 * AESBlockSize), xmm_result0);
+ __ addptr(pos, AESBlockSize);
+ __ subptr(len_reg, AESBlockSize);
+ __ jmp(L_singleBlockLoopTop[k]);
+ __ BIND(L_processTail_insr[k]);
+ __ addptr(pos, len_reg);
+ __ testptr(len_reg, 8);
+ __ jcc(Assembler::zero, L_processTail_4_insr[k]);
+ __ subptr(pos,8);
+ __ pinsrq(xmm_from0, Address(from, pos), 0);
+ __ BIND(L_processTail_4_insr[k]);
+ __ testptr(len_reg, 4);
+ __ jcc(Assembler::zero, L_processTail_2_insr[k]);
+ __ subptr(pos,4);
+ __ pslldq(xmm_from0, 4);
+ __ pinsrd(xmm_from0, Address(from, pos), 0);
+ __ BIND(L_processTail_2_insr[k]);
+ __ testptr(len_reg, 2);
+ __ jcc(Assembler::zero, L_processTail_1_insr[k]);
+ __ subptr(pos, 2);
+ __ pslldq(xmm_from0, 2);
+ __ pinsrw(xmm_from0, Address(from, pos), 0);
+ __ BIND(L_processTail_1_insr[k]);
+ __ testptr(len_reg, 1);
+ __ jcc(Assembler::zero, L_processTail_exit_insr[k]);
+ __ subptr(pos, 1);
+ __ pslldq(xmm_from0, 1);
+ __ pinsrb(xmm_from0, Address(from, pos), 0);
+ __ BIND(L_processTail_exit_insr[k]);
+
+ __ movdqu(Address(saved_encCounter_start, 0), xmm_result0);
+ __ pxor(xmm_result0, xmm_from0);
+
+ __ testptr(len_reg, 8);
+ __ jcc(Assembler::zero, L_processTail_4_extr[k]);
+ __ pextrq(Address(to, pos), xmm_result0, 0);
+ __ psrldq(xmm_result0, 8);
+ __ addptr(pos, 8);
+ __ BIND(L_processTail_4_extr[k]);
+ __ testptr(len_reg, 4);
+ __ jcc(Assembler::zero, L_processTail_2_extr[k]);
+ __ pextrd(Address(to, pos), xmm_result0, 0);
+ __ psrldq(xmm_result0, 4);
+ __ addptr(pos, 4);
+ __ BIND(L_processTail_2_extr[k]);
+ __ testptr(len_reg, 2);
+ __ jcc(Assembler::zero, L_processTail_1_extr[k]);
+ __ pextrw(Address(to, pos), xmm_result0, 0);
+ __ psrldq(xmm_result0, 2);
+ __ addptr(pos, 2);
+ __ BIND(L_processTail_1_extr[k]);
+ __ testptr(len_reg, 1);
+ __ jcc(Assembler::zero, L_processTail_exit_extr[k]);
+ __ pextrb(Address(to, pos), xmm_result0, 0);
+
+ __ BIND(L_processTail_exit_extr[k]);
+ __ movl(Address(used_addr, 0), len_reg);
+ __ jmp(L_exit);
+
+ }
+
+ __ BIND(L_exit);
+ __ pshufb(xmm_curr_counter, xmm_counter_shuf_mask); //counter is shuffled back.
+ __ movdqu(Address(counter, 0), xmm_curr_counter); //save counter back
+ __ pop(rbx); // pop the saved RBX.
+#ifdef _WIN64
+ // restore regs belonging to calling function
+ for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
+ __ movdqu(as_XMMRegister(i), xmm_save(i));
+ }
+ __ movl(rax, len_mem);
+ __ movptr(r13, r13_save);
+ __ movptr(r14, r14_save);
+#else
+ __ pop(rax); // return 'len'
+#endif
+ __ leave(); // required for proper stackwalking of RuntimeStub frame
+ __ ret(0);
+ return start;
+ }
// byte swap x86 long
address generate_ghash_long_swap_mask() {
@@ -4239,12 +4574,15 @@ class StubGenerator: public StubCodeGenerator {
// don't bother generating these AES intrinsic stubs unless global flag is set
if (UseAESIntrinsics) {
StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // needed by the others
-
StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
}
+ if (UseAESCTRIntrinsics){
+ StubRoutines::x86::_counter_shuffle_mask_addr = generate_counter_shuffle_mask();
+ StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt_Parallel();
+ }
// Generate GHASH intrinsics code
if (UseGHASHIntrinsics) {
diff --git a/hotspot/src/cpu/x86/vm/stubRoutines_x86.cpp b/hotspot/src/cpu/x86/vm/stubRoutines_x86.cpp
index 9b0d8fc75..617879377 100644
--- a/hotspot/src/cpu/x86/vm/stubRoutines_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/stubRoutines_x86.cpp
@@ -33,6 +33,7 @@
address StubRoutines::x86::_verify_mxcsr_entry = NULL;
address StubRoutines::x86::_key_shuffle_mask_addr = NULL;
+address StubRoutines::x86::_counter_shuffle_mask_addr = NULL;
address StubRoutines::x86::_ghash_long_swap_mask_addr = NULL;
address StubRoutines::x86::_ghash_byte_swap_mask_addr = NULL;
diff --git a/hotspot/src/cpu/x86/vm/stubRoutines_x86.hpp b/hotspot/src/cpu/x86/vm/stubRoutines_x86.hpp
index bb160486c..70b5a34ac 100644
--- a/hotspot/src/cpu/x86/vm/stubRoutines_x86.hpp
+++ b/hotspot/src/cpu/x86/vm/stubRoutines_x86.hpp
@@ -33,6 +33,10 @@
static address _verify_mxcsr_entry;
// shuffle mask for fixing up 128-bit words consisting of big-endian 32-bit integers
static address _key_shuffle_mask_addr;
+
+ //shuffle mask for big-endian 128-bit integers
+ static address _counter_shuffle_mask_addr;
+
// masks and table for CRC32
static uint64_t _crc_by128_masks[];
static juint _crc_table[];
@@ -43,6 +47,7 @@
public:
static address verify_mxcsr_entry() { return _verify_mxcsr_entry; }
static address key_shuffle_mask_addr() { return _key_shuffle_mask_addr; }
+ static address counter_shuffle_mask_addr() { return _counter_shuffle_mask_addr; }
static address crc_by128_masks_addr() { return (address)_crc_by128_masks; }
static address ghash_long_swap_mask_addr() { return _ghash_long_swap_mask_addr; }
static address ghash_byte_swap_mask_addr() { return _ghash_byte_swap_mask_addr; }
diff --git a/hotspot/src/cpu/x86/vm/stubRoutines_x86_32.hpp b/hotspot/src/cpu/x86/vm/stubRoutines_x86_32.hpp
index bca5d493c..538f83e69 100644
--- a/hotspot/src/cpu/x86/vm/stubRoutines_x86_32.hpp
+++ b/hotspot/src/cpu/x86/vm/stubRoutines_x86_32.hpp
@@ -31,7 +31,7 @@
enum platform_dependent_constants {
code_size1 = 9000, // simply increase if too small (assembler will crash if too small)
- code_size2 = 22000 // simply increase if too small (assembler will crash if too small)
+ code_size2 = 25800 // simply increase if too small (assembler will crash if too small)
};
class x86 {
diff --git a/hotspot/src/cpu/x86/vm/stubRoutines_x86_64.hpp b/hotspot/src/cpu/x86/vm/stubRoutines_x86_64.hpp
index b048fd74e..f963cd2f8 100644
--- a/hotspot/src/cpu/x86/vm/stubRoutines_x86_64.hpp
+++ b/hotspot/src/cpu/x86/vm/stubRoutines_x86_64.hpp
@@ -33,7 +33,7 @@ static bool returns_to_call_stub(address return_pc) { return return_pc == _
enum platform_dependent_constants {
code_size1 = 19000, // simply increase if too small (assembler will crash if too small)
- code_size2 = 24000 // simply increase if too small (assembler will crash if too small)
+ code_size2 = 27000 // simply increase if too small (assembler will crash if too small)
};
class x86 {
diff --git a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp
index 46b3e32ea..ce3037d76 100644
--- a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp
@@ -573,6 +573,28 @@ void VM_Version::get_processor_features() {
}
FLAG_SET_DEFAULT(UseAESIntrinsics, false);
}
+
+ // --AES-CTR begins--
+ if (!UseAESIntrinsics) {
+ if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
+ warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
+ FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
+ }
+ } else {
+ if(supports_sse4_1() && UseSSE >= 4) {
+ if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
+ FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
+ }
+ } else {
+ // The AES-CTR intrinsic stubs require AES instruction support (of course)
+ // but also require sse4.1 mode or higher for instructions it use.
+ if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
+ warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
+ }
+ FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
+ }
+ }
+ // --AES-CTR ends--
}
} else if (UseAES || UseAESIntrinsics) {
if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
@@ -583,6 +605,10 @@ void VM_Version::get_processor_features() {
warning("AES intrinsics are not available on this CPU");
FLAG_SET_DEFAULT(UseAESIntrinsics, false);
}
+ if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
+ warning("AES-CTR intrinsics are not available on this CPU");
+ FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
+ }
}
// Use CLMUL instructions if available.
@@ -606,6 +632,16 @@ void VM_Version::get_processor_features() {
FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
}
+ if (UseAESIntrinsics) {
+ if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
+ UseAESCTRIntrinsics = true;
+ }
+ } else if (UseAESCTRIntrinsics) {
+ if (!FLAG_IS_DEFAULT(UseAESCTRIntrinsics))
+ warning("AES/CTR intrinsics are not available on this CPU");
+ FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
+ }
+
// GHASH/GCM intrinsics
if (UseCLMUL && (UseSSE > 2)) {
if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
diff --git a/hotspot/src/share/vm/classfile/vmSymbols.hpp b/hotspot/src/share/vm/classfile/vmSymbols.hpp
index 942d172a1..4ca2a3ad4 100644
--- a/hotspot/src/share/vm/classfile/vmSymbols.hpp
+++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp
@@ -846,6 +846,10 @@
do_name( decrypt_name, "implDecrypt") \
do_signature(byteArray_int_int_byteArray_int_signature, "([BII[BI)I") \
\
+ do_class(com_sun_crypto_provider_counterMode, "com/sun/crypto/provider/CounterMode") \
+ do_intrinsic(_counterMode_AESCrypt, com_sun_crypto_provider_counterMode, crypt_name, byteArray_int_int_byteArray_int_signature, F_R) \
+ do_name( crypt_name, "implCrypt") \
+ \
/* support for sun.security.provider.SHA */ \
do_class(sun_security_provider_sha, "sun/security/provider/SHA") \
do_intrinsic(_sha_implCompress, sun_security_provider_sha, implCompress_name, implCompress_signature, F_R) \
diff --git a/hotspot/src/share/vm/opto/escape.cpp b/hotspot/src/share/vm/opto/escape.cpp
index 6f8ffe608..a0e497f08 100644
--- a/hotspot/src/share/vm/opto/escape.cpp
+++ b/hotspot/src/share/vm/opto/escape.cpp
@@ -952,6 +952,7 @@ void ConnectionGraph::process_call_arguments(CallNode *call) {
strcmp(call->as_CallLeaf()->_name, "aescrypt_decryptBlock") == 0 ||
strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_encryptAESCrypt") == 0 ||
strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_decryptAESCrypt") == 0 ||
+ strcmp(call->as_CallLeaf()->_name, "counterMode_AESCrypt") == 0 ||
strcmp(call->as_CallLeaf()->_name, "ghash_processBlocks") == 0 ||
strcmp(call->as_CallLeaf()->_name, "sha1_implCompress") == 0 ||
strcmp(call->as_CallLeaf()->_name, "sha1_implCompressMB") == 0 ||
diff --git a/hotspot/src/share/vm/opto/library_call.cpp b/hotspot/src/share/vm/opto/library_call.cpp
index bb721f6f1..2add82dd1 100644
--- a/hotspot/src/share/vm/opto/library_call.cpp
+++ b/hotspot/src/share/vm/opto/library_call.cpp
@@ -196,6 +196,7 @@ class LibraryCallKit : public GraphKit {
return generate_method_call(method_id, true, false);
}
Node * load_field_from_object(Node * fromObj, const char * fieldName, const char * fieldTypeString, bool is_exact, bool is_static);
+ Node * field_address_from_object(Node * fromObj, const char * fieldName, const char * fieldTypeString, bool is_exact, bool is_static, ciInstanceKlass * fromKls);
Node* make_string_method_node(int opcode, Node* str1_start, Node* cnt1, Node* str2_start, Node* cnt2);
Node* make_string_method_node(int opcode, Node* str1, Node* str2);
@@ -309,7 +310,9 @@ class LibraryCallKit : public GraphKit {
bool inline_reference_get();
bool inline_aescrypt_Block(vmIntrinsics::ID id);
bool inline_cipherBlockChaining_AESCrypt(vmIntrinsics::ID id);
+ bool inline_counterMode_AESCrypt(vmIntrinsics::ID id);
Node* inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting);
+ Node* inline_counterMode_AESCrypt_predicate();
Node* get_key_start_from_aescrypt_object(Node* aescrypt_object);
Node* get_original_key_start_from_aescrypt_object(Node* aescrypt_object);
bool inline_ghash_processBlocks();
@@ -558,6 +561,13 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) {
predicates = 1;
break;
+ case vmIntrinsics::_counterMode_AESCrypt:
+ if (!UseAESCTRIntrinsics) {
+ return NULL;
+ }
+ predicates = 1;
+ break;
+
case vmIntrinsics::_sha_implCompress:
if (!UseSHA1Intrinsics) return NULL;
break;
@@ -950,6 +960,9 @@ bool LibraryCallKit::try_to_inline(int predicate) {
case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
return inline_cipherBlockChaining_AESCrypt(intrinsic_id());
+ case vmIntrinsics::_counterMode_AESCrypt:
+ return inline_counterMode_AESCrypt(intrinsic_id());
+
case vmIntrinsics::_sha_implCompress:
case vmIntrinsics::_sha2_implCompress:
case vmIntrinsics::_sha5_implCompress:
@@ -1021,6 +1034,8 @@ Node* LibraryCallKit::try_to_predicate(int predicate) {
return inline_cipherBlockChaining_AESCrypt_predicate(false);
case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
return inline_cipherBlockChaining_AESCrypt_predicate(true);
+ case vmIntrinsics::_counterMode_AESCrypt:
+ return inline_counterMode_AESCrypt_predicate();
case vmIntrinsics::_digestBase_implCompressMB:
return inline_digestBase_implCompressMB_predicate(predicate);
@@ -6581,6 +6596,39 @@ Node * LibraryCallKit::load_field_from_object(Node * fromObj, const char * field
return loadedField;
}
+Node * LibraryCallKit::field_address_from_object(Node * fromObj, const char * fieldName, const char * fieldTypeString,
+ bool is_exact = true, bool is_static = false,
+ ciInstanceKlass * fromKls = NULL) {
+ if (fromKls == NULL) {
+ const TypeInstPtr* tinst = _gvn.type(fromObj)->isa_instptr();
+ assert(tinst != NULL, "obj is null");
+ assert(tinst->klass()->is_loaded(), "obj is not loaded");
+ assert(!is_exact || tinst->klass_is_exact(), "klass not exact");
+ fromKls = tinst->klass()->as_instance_klass();
+ }
+ else {
+ assert(is_static, "only for static field access");
+ }
+ ciField* field = fromKls->get_field_by_name(ciSymbol::make(fieldName),
+ ciSymbol::make(fieldTypeString),
+ is_static);
+
+ assert(field != NULL, "undefined field");
+ assert(!field->is_volatile(), "not defined for volatile fields");
+
+ if (is_static) {
+ const TypeInstPtr* tip = TypeInstPtr::make(fromKls->java_mirror());
+ fromObj = makecon(tip);
+ }
+
+ // Next code copied from Parse::do_get_xxx():
+
+ // Compute address and memory type.
+ int offset = field->offset_in_bytes();
+ Node *adr = basic_plus_adr(fromObj, fromObj, offset);
+
+ return adr;
+}
//------------------------------inline_aescrypt_Block-----------------------
bool LibraryCallKit::inline_aescrypt_Block(vmIntrinsics::ID id) {
@@ -6747,6 +6795,90 @@ bool LibraryCallKit::inline_cipherBlockChaining_AESCrypt(vmIntrinsics::ID id) {
return true;
}
+//------------------------------inline_counterMode_AESCrypt-----------------------
+bool LibraryCallKit::inline_counterMode_AESCrypt(vmIntrinsics::ID id) {
+ assert(UseAES, "need AES instruction support");
+ if (!UseAESCTRIntrinsics) return false;
+
+ address stubAddr = NULL;
+ const char *stubName = NULL;
+ if (id == vmIntrinsics::_counterMode_AESCrypt) {
+ stubAddr = StubRoutines::counterMode_AESCrypt();
+ stubName = "counterMode_AESCrypt";
+ }
+ if (stubAddr == NULL) return false;
+
+ Node* counterMode_object = argument(0);
+ Node* src = argument(1);
+ Node* src_offset = argument(2);
+ Node* len = argument(3);
+ Node* dest = argument(4);
+ Node* dest_offset = argument(5);
+
+ // (1) src and dest are arrays.
+ const Type* src_type = src->Value(&_gvn);
+ const Type* dest_type = dest->Value(&_gvn);
+ const TypeAryPtr* top_src = src_type->isa_aryptr();
+ const TypeAryPtr* top_dest = dest_type->isa_aryptr();
+ assert(top_src != NULL && top_src->klass() != NULL &&
+ top_dest != NULL && top_dest->klass() != NULL, "args are strange");
+
+ // checks are the responsibility of the caller
+ Node* src_start = src;
+ Node* dest_start = dest;
+ if (src_offset != NULL || dest_offset != NULL) {
+ assert(src_offset != NULL && dest_offset != NULL, "");
+ src_start = array_element_address(src, src_offset, T_BYTE);
+ dest_start = array_element_address(dest, dest_offset, T_BYTE);
+ }
+
+ // if we are in this set of code, we "know" the embeddedCipher is an AESCrypt object
+ // (because of the predicated logic executed earlier).
+ // so we cast it here safely.
+ // this requires a newer class file that has this array as littleEndian ints, otherwise we revert to java
+ Node* embeddedCipherObj = load_field_from_object(counterMode_object, "embeddedCipher", "Lcom/sun/crypto/provider/SymmetricCipher;", /*is_exact*/ false);
+ if (embeddedCipherObj == NULL) return false;
+ // cast it to what we know it will be at runtime
+ const TypeInstPtr* tinst = _gvn.type(counterMode_object)->isa_instptr();
+ assert(tinst != NULL, "CTR obj is null");
+ assert(tinst->klass()->is_loaded(), "CTR obj is not loaded");
+ ciKlass* klass_AESCrypt = tinst->klass()->as_instance_klass()->find_klass(ciSymbol::make("com/sun/crypto/provider/AESCrypt"));
+ assert(klass_AESCrypt->is_loaded(), "predicate checks that this class is loaded");
+ ciInstanceKlass* instklass_AESCrypt = klass_AESCrypt->as_instance_klass();
+ const TypeKlassPtr* aklass = TypeKlassPtr::make(instklass_AESCrypt);
+ const TypeOopPtr* xtype = aklass->as_instance_type();
+ Node* aescrypt_object = new (C) CheckCastPPNode(control(), embeddedCipherObj, xtype);
+ aescrypt_object = _gvn.transform(aescrypt_object);
+ // we need to get the start of the aescrypt_object's expanded key array
+ Node* k_start = get_key_start_from_aescrypt_object(aescrypt_object);
+ if (k_start == NULL) return false;
+ // similarly, get the start address of the r vector
+ Node* obj_counter = load_field_from_object(counterMode_object, "counter", "[B", /*is_exact*/ false);
+ if (obj_counter == NULL) return false;
+ Node* cnt_start = array_element_address(obj_counter, intcon(0), T_BYTE);
+
+ Node* saved_encCounter = load_field_from_object(counterMode_object, "encryptedCounter", "[B", /*is_exact*/ false);
+ if (saved_encCounter == NULL) return false;
+ Node* saved_encCounter_start = array_element_address(saved_encCounter, intcon(0), T_BYTE);
+ Node* used = field_address_from_object(counterMode_object, "used", "I", /*is_exact*/ false);
+
+ Node* ctrCrypt;
+ if (Matcher::pass_original_key_for_aes()) {
+ // no SPARC version for AES/CTR intrinsics now.
+ return false;
+ }
+ // Call the stub, passing src_start, dest_start, k_start, r_start and src_len
+ ctrCrypt = make_runtime_call(RC_LEAF|RC_NO_FP,
+ OptoRuntime::counterMode_aescrypt_Type(),
+ stubAddr, stubName, TypePtr::BOTTOM,
+ src_start, dest_start, k_start, cnt_start, len, saved_encCounter_start, used);
+
+ // return cipher length (int)
+ Node* retvalue = _gvn.transform(new (C) ProjNode(ctrCrypt, TypeFunc::Parms));
+ set_result(retvalue);
+ return true;
+}
+
//------------------------------get_key_start_from_aescrypt_object-----------------------
Node * LibraryCallKit::get_key_start_from_aescrypt_object(Node *aescrypt_object) {
#ifdef PPC64
@@ -6841,6 +6973,48 @@ Node* LibraryCallKit::inline_cipherBlockChaining_AESCrypt_predicate(bool decrypt
return _gvn.transform(region);
}
+//----------------------------inline_counterMode_AESCrypt_predicate----------------------------
+// Return node representing slow path of predicate check.
+// the pseudo code we want to emulate with this predicate is:
+// for encryption:
+// if (embeddedCipherObj instanceof AESCrypt) do_intrinsic, else do_javapath
+// for decryption:
+// if ((embeddedCipherObj instanceof AESCrypt) && (cipher!=plain)) do_intrinsic, else do_javapath
+// note cipher==plain is more conservative than the original java code but that's OK
+//
+
+Node* LibraryCallKit::inline_counterMode_AESCrypt_predicate() {
+ // The receiver was checked for NULL already.
+ Node* objCTR = argument(0);
+
+ // Load embeddedCipher field of CipherBlockChaining object.
+ Node* embeddedCipherObj = load_field_from_object(objCTR, "embeddedCipher", "Lcom/sun/crypto/provider/SymmetricCipher;", /*is_exact*/ false);
+
+ // get AESCrypt klass for instanceOf check
+ // AESCrypt might not be loaded yet if some other SymmetricCipher got us to this compile point
+ // will have same classloader as CipherBlockChaining object
+ const TypeInstPtr* tinst = _gvn.type(objCTR)->isa_instptr();
+ assert(tinst != NULL, "CTRobj is null");
+ assert(tinst->klass()->is_loaded(), "CTRobj is not loaded");
+
+ // we want to do an instanceof comparison against the AESCrypt class
+ ciKlass* klass_AESCrypt = tinst->klass()->as_instance_klass()->find_klass(ciSymbol::make("com/sun/crypto/provider/AESCrypt"));
+ if (!klass_AESCrypt->is_loaded()) {
+ // if AESCrypt is not even loaded, we never take the intrinsic fast path
+ Node* ctrl = control();
+ set_control(top()); // no regular fast path
+ return ctrl;
+ }
+
+ ciInstanceKlass* instklass_AESCrypt = klass_AESCrypt->as_instance_klass();
+ Node* instof = gen_instanceof(embeddedCipherObj, makecon(TypeKlassPtr::make(instklass_AESCrypt)));
+ Node* cmp_instof = _gvn.transform(new (C) CmpINode(instof, intcon(1)));
+ Node* bool_instof = _gvn.transform(new (C) BoolNode(cmp_instof, BoolTest::ne));
+ Node* instof_false = generate_guard(bool_instof, NULL, PROB_MIN);
+
+ return instof_false; // even if it is NULL
+}
+
//------------------------------inline_ghash_processBlocks
bool LibraryCallKit::inline_ghash_processBlocks() {
address stubAddr;
diff --git a/hotspot/src/share/vm/opto/runtime.cpp b/hotspot/src/share/vm/opto/runtime.cpp
index 0a86211ba..1c51be19b 100644
--- a/hotspot/src/share/vm/opto/runtime.cpp
+++ b/hotspot/src/share/vm/opto/runtime.cpp
@@ -1021,6 +1021,35 @@ const TypeFunc* OptoRuntime::cipherBlockChaining_aescrypt_Type() {
return TypeFunc::make(domain, range);
}
+//for counterMode calls of aescrypt encrypt/decrypt, four pointers and a length, returning int
+const TypeFunc* OptoRuntime::counterMode_aescrypt_Type() {
+ // create input type (domain)
+ int num_args = 7;
+ if (Matcher::pass_original_key_for_aes()) {
+ num_args = 8;
+ }
+ int argcnt = num_args;
+ const Type** fields = TypeTuple::fields(argcnt);
+ int argp = TypeFunc::Parms;
+ fields[argp++] = TypePtr::NOTNULL; // src
+ fields[argp++] = TypePtr::NOTNULL; // dest
+ fields[argp++] = TypePtr::NOTNULL; // k array
+ fields[argp++] = TypePtr::NOTNULL; // counter array
+ fields[argp++] = TypeInt::INT; // src len
+ fields[argp++] = TypePtr::NOTNULL; // saved_encCounter
+ fields[argp++] = TypePtr::NOTNULL; // saved used addr
+ if (Matcher::pass_original_key_for_aes()) {
+ fields[argp++] = TypePtr::NOTNULL; // original k array
+ }
+ assert(argp == TypeFunc::Parms + argcnt, "correct decoding");
+ const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms + argcnt, fields);
+ // returning cipher len (int)
+ fields = TypeTuple::fields(1);
+ fields[TypeFunc::Parms + 0] = TypeInt::INT;
+ const TypeTuple* range = TypeTuple::make(TypeFunc::Parms + 1, fields);
+ return TypeFunc::make(domain, range);
+}
+
/*
* void implCompress(byte[] buf, int ofs)
*/
diff --git a/hotspot/src/share/vm/opto/runtime.hpp b/hotspot/src/share/vm/opto/runtime.hpp
index 47133d58c..f27e7d507 100644
--- a/hotspot/src/share/vm/opto/runtime.hpp
+++ b/hotspot/src/share/vm/opto/runtime.hpp
@@ -299,6 +299,7 @@ private:
static const TypeFunc* aescrypt_block_Type();
static const TypeFunc* cipherBlockChaining_aescrypt_Type();
+ static const TypeFunc* counterMode_aescrypt_Type();
static const TypeFunc* sha_implCompress_Type();
static const TypeFunc* digestBase_implCompressMB_Type();
diff --git a/hotspot/src/share/vm/runtime/globals.hpp b/hotspot/src/share/vm/runtime/globals.hpp
index 65dfcf69b..91e52f033 100644
--- a/hotspot/src/share/vm/runtime/globals.hpp
+++ b/hotspot/src/share/vm/runtime/globals.hpp
@@ -734,6 +734,9 @@ class CommandLineFlags {
product(bool, UseAESIntrinsics, false, \
"Use intrinsics for AES versions of crypto") \
\
+ product(bool, UseAESCTRIntrinsics, false, \
+ "Use intrinsics for the paralleled version of AES/CTR crypto") \
+ \
product(bool, UseSHA1Intrinsics, false, \
"Use intrinsics for SHA-1 crypto hash function") \
\
diff --git a/hotspot/src/share/vm/runtime/stubRoutines.cpp b/hotspot/src/share/vm/runtime/stubRoutines.cpp
index f2106d13a..d66237137 100644
--- a/hotspot/src/share/vm/runtime/stubRoutines.cpp
+++ b/hotspot/src/share/vm/runtime/stubRoutines.cpp
@@ -124,6 +124,7 @@ address StubRoutines::_aescrypt_encryptBlock = NULL;
address StubRoutines::_aescrypt_decryptBlock = NULL;
address StubRoutines::_cipherBlockChaining_encryptAESCrypt = NULL;
address StubRoutines::_cipherBlockChaining_decryptAESCrypt = NULL;
+address StubRoutines::_counterMode_AESCrypt = NULL;
address StubRoutines::_ghash_processBlocks = NULL;
address StubRoutines::_sha1_implCompress = NULL;
diff --git a/hotspot/src/share/vm/runtime/stubRoutines.hpp b/hotspot/src/share/vm/runtime/stubRoutines.hpp
index 16075d9f4..9fb589540 100644
--- a/hotspot/src/share/vm/runtime/stubRoutines.hpp
+++ b/hotspot/src/share/vm/runtime/stubRoutines.hpp
@@ -202,6 +202,7 @@ class StubRoutines: AllStatic {
static address _aescrypt_decryptBlock;
static address _cipherBlockChaining_encryptAESCrypt;
static address _cipherBlockChaining_decryptAESCrypt;
+ static address _counterMode_AESCrypt;
static address _ghash_processBlocks;
static address _sha1_implCompress;
@@ -370,6 +371,7 @@ class StubRoutines: AllStatic {
static address aescrypt_decryptBlock() { return _aescrypt_decryptBlock; }
static address cipherBlockChaining_encryptAESCrypt() { return _cipherBlockChaining_encryptAESCrypt; }
static address cipherBlockChaining_decryptAESCrypt() { return _cipherBlockChaining_decryptAESCrypt; }
+ static address counterMode_AESCrypt() { return _counterMode_AESCrypt; }
static address ghash_processBlocks() { return _ghash_processBlocks; }
static address sha1_implCompress() { return _sha1_implCompress; }
diff --git a/hotspot/src/share/vm/runtime/vmStructs.cpp b/hotspot/src/share/vm/runtime/vmStructs.cpp
index 3f2bfeb74..842b5840d 100644
--- a/hotspot/src/share/vm/runtime/vmStructs.cpp
+++ b/hotspot/src/share/vm/runtime/vmStructs.cpp
@@ -815,6 +815,7 @@ typedef TwoOopHashtable<Symbol*, mtClass> SymbolTwoOopHashtable;
static_field(StubRoutines, _aescrypt_decryptBlock, address) \
static_field(StubRoutines, _cipherBlockChaining_encryptAESCrypt, address) \
static_field(StubRoutines, _cipherBlockChaining_decryptAESCrypt, address) \
+ static_field(StubRoutines, _counterMode_AESCrypt, address) \
static_field(StubRoutines, _ghash_processBlocks, address) \
static_field(StubRoutines, _updateBytesCRC32, address) \
static_field(StubRoutines, _crc_table_adr, address) \
diff --git a/hotspot/test/compiler/7184394/TestAESBase.java b/hotspot/test/compiler/7184394/TestAESBase.java
index 5c3e6881e..afda2a1f7 100644
--- a/hotspot/test/compiler/7184394/TestAESBase.java
+++ b/hotspot/test/compiler/7184394/TestAESBase.java
@@ -106,8 +106,8 @@ abstract public class TestAESBase {
cipher = Cipher.getInstance(algorithm + "/" + mode + "/" + paddingStr, "SunJCE");
dCipher = Cipher.getInstance(algorithm + "/" + mode + "/" + paddingStr, "SunJCE");
- // CBC init
- if (mode.equals("CBC")) {
+ // CBC or CTR init
+ if (mode.equals("CBC") || mode.equals("CTR")) {
IvParameterSpec initVector = new IvParameterSpec(iv);
cipher.init(Cipher.ENCRYPT_MODE, key, initVector);
algParams = cipher.getParameters();
diff --git a/hotspot/test/compiler/7184394/TestAESMain.java b/hotspot/test/compiler/7184394/TestAESMain.java
index ddd8eeaef..65949420a 100644
--- a/hotspot/test/compiler/7184394/TestAESMain.java
+++ b/hotspot/test/compiler/7184394/TestAESMain.java
@@ -48,6 +48,13 @@
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 -DencOutputOffset=1 TestAESMain
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 TestAESMain
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 -DpaddingStr=NoPadding -DmsgSize=640 TestAESMain
+ * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=CTR TestAESMain
+ * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=CTR -DencInputOffset=1 TestAESMain
+ * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=CTR -DencOutputOffset=1 TestAESMain
+ * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=CTR -DdecOutputOffset=1 TestAESMain
+ * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=CTR -DencInputOffset=1 -DencOutputOffset=1 TestAESMain
+ * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=CTR -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 TestAESMain
+ * @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=CTR -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 -DpaddingStr=NoPadding -DmsgSize=640 TestAESMain
*
* @author Tom Deneau
*/
diff --git a/jdk/src/share/classes/com/sun/crypto/provider/CounterMode.java b/jdk/src/share/classes/com/sun/crypto/provider/CounterMode.java
index aea9336c9..c2bd38a71 100644
--- a/jdk/src/share/classes/com/sun/crypto/provider/CounterMode.java
+++ b/jdk/src/share/classes/com/sun/crypto/provider/CounterMode.java
@@ -39,10 +39,10 @@ import java.security.InvalidKeyException;
* @author Andreas Sterbenz
* @since 1.4.2
*/
-final class CounterMode extends FeedbackCipher {
+class CounterMode extends FeedbackCipher {
// current counter value
- private final byte[] counter;
+ final byte[] counter;
// encrypted bytes of the previous counter value
private final byte[] encryptedCounter;
@@ -137,7 +137,7 @@ final class CounterMode extends FeedbackCipher {
* <code>cipherOffset</code>.
*
* @param in the buffer with the input data to be encrypted
- * @param inOffset the offset in <code>plain</code>
+ * @param inOff the offset in <code>plain</code>
* @param len the length of the input data
* @param out the buffer for the result
* @param outOff the offset in <code>cipher</code>
@@ -176,6 +176,11 @@ final class CounterMode extends FeedbackCipher {
RangeUtil.nullAndBoundsCheck(in, inOff, len);
RangeUtil.nullAndBoundsCheck(out, outOff, len);
+ return implCrypt(in, inOff, len, out, outOff);
+ }
+
+ // Implementation of crpyt() method. Possibly replaced with a compiler intrinsic.
+ private int implCrypt(byte[] in, int inOff, int len, byte[] out, int outOff) {
int result = len;
while (len-- > 0) {
if (used >= blockSize) {
diff --git a/jdk/src/share/classes/com/sun/crypto/provider/GCTR.java b/jdk/src/share/classes/com/sun/crypto/provider/GCTR.java
index f8a3eaa0a..6a394e448 100644
--- a/jdk/src/share/classes/com/sun/crypto/provider/GCTR.java
+++ b/jdk/src/share/classes/com/sun/crypto/provider/GCTR.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2017 Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -29,52 +29,43 @@
package com.sun.crypto.provider;
-import java.security.*;
-import javax.crypto.*;
+import javax.crypto.IllegalBlockSizeException;
import static com.sun.crypto.provider.AESConstants.AES_BLOCK_SIZE;
/**
* This class represents the GCTR function defined in NIST 800-38D
- * under section 6.5. It needs to be constructed w/ an initialized
- * cipher object, and initial counter block(ICB). Given an input X
- * of arbitrary length, it processes and returns an output which has
- * the same length as X. The invariants of this class are:
- *
- * (1) The length of intialCounterBlk (and also of its clones, e.g.,
- * fields counter and counterSave) is equal to AES_BLOCK_SIZE.
- *
- * (2) After construction, the field counter never becomes null, it
- * always contains a byte array of length AES_BLOCK_SIZE.
+ * under section 6.5. With a given cipher object and initial counter
+ * block, a counter mode operation is performed. Blocksize is limited
+ * to 16 bytes.
*
* If any invariant is broken, failures can occur because the
* AESCrypt.encryptBlock method can be intrinsified on the HotSpot VM
* (see JDK-8067648 for details).
*
+ * The counter mode operations can be intrinsified and parallelized
+ * by using CounterMode.implCrypt() if HotSpot VM supports it on the
+ * architecture.
+ *
* <p>This function is used in the implementation of GCM mode.
*
* @since 1.8
*/
-final class GCTR {
-
- // these fields should not change after the object has been constructed
- private final SymmetricCipher aes;
- private final byte[] icb;
-
- // the current counter value
- private byte[] counter;
+final class GCTR extends CounterMode {
- // needed for save/restore calls
- private byte[] counterSave = null;
-
- // NOTE: cipher should already be initialized
GCTR(SymmetricCipher cipher, byte[] initialCounterBlk) {
- this.aes = cipher;
+ super(cipher);
if (initialCounterBlk.length != AES_BLOCK_SIZE) {
throw new RuntimeException("length of initial counter block (" + initialCounterBlk.length +
") not equal to AES_BLOCK_SIZE (" + AES_BLOCK_SIZE + ")");
}
- this.icb = initialCounterBlk;
- this.counter = icb.clone();
+
+ iv = initialCounterBlk;
+ reset();
+ }
+
+ @Override
+ String getFeedback() {
+ return "GCTR";
}
// input must be multiples of 128-bit blocks when calling update
@@ -89,23 +80,11 @@ final class GCTR {
throw new RuntimeException("output buffer too small");
}
- byte[] encryptedCntr = new byte[AES_BLOCK_SIZE];
-
- int numOfCompleteBlocks = inLen / AES_BLOCK_SIZE;
- for (int i = 0; i < numOfCompleteBlocks; i++) {
- aes.encryptBlock(counter, 0, encryptedCntr, 0);
- for (int n = 0; n < AES_BLOCK_SIZE; n++) {
- int index = (i * AES_BLOCK_SIZE + n);
- out[outOfs + index] =
- (byte) ((in[inOfs + index] ^ encryptedCntr[n]));
- }
- GaloisCounterMode.increment32(counter);
- }
- return inLen;
+ return encrypt(in, inOfs, inLen, out, outOfs);
}
// input can be arbitrary size when calling doFinal
- protected int doFinal(byte[] in, int inOfs, int inLen, byte[] out,
+ int doFinal(byte[] in, int inOfs, int inLen, byte[] out,
int outOfs) throws IllegalBlockSizeException {
try {
if (inLen < 0) {
@@ -118,7 +97,7 @@ final class GCTR {
if (lastBlockSize != 0) {
// do the last partial block
byte[] encryptedCntr = new byte[AES_BLOCK_SIZE];
- aes.encryptBlock(counter, 0, encryptedCntr, 0);
+ embeddedCipher.encryptBlock(counter, 0, encryptedCntr, 0);
for (int n = 0; n < lastBlockSize; n++) {
out[outOfs + completeBlkLen + n] =
(byte) ((in[inOfs + completeBlkLen + n] ^
@@ -131,28 +110,4 @@ final class GCTR {
}
return inLen;
}
-
- /**
- * Resets the content of this object to when it's first constructed.
- */
- void reset() {
- System.arraycopy(icb, 0, counter, 0, icb.length);
- counterSave = null;
- }
-
- /**
- * Save the current content of this object.
- */
- void save() {
- this.counterSave = this.counter.clone();
- }
-
- /**
- * Restores the content of this object to the previous saved one.
- */
- void restore() {
- if (this.counterSave != null) {
- this.counter = this.counterSave;
- }
- }
}
diff --git a/jdk/src/share/classes/com/sun/crypto/provider/GHASH.java b/jdk/src/share/classes/com/sun/crypto/provider/GHASH.java
index dc42e6bbf..78f0723d7 100644
--- a/jdk/src/share/classes/com/sun/crypto/provider/GHASH.java
+++ b/jdk/src/share/classes/com/sun/crypto/provider/GHASH.java
@@ -122,10 +122,10 @@ final class GHASH {
}
- /* subkeyH and state are stored in long[] for GHASH intrinsic use */
+ /* subkeyHtbl and state are stored in long[] for GHASH intrinsic use */
- // hash subkey H; should not change after the object has been constructed
- private final long[] subkeyH;
+ // hashtable subkeyHtbl; holds 2*9 powers of subkeyH computed using carry-less multiplication
+ private long[] subkeyHtbl;
// buffer for storing hash
private final long[] state;
@@ -147,9 +147,9 @@ final class GHASH {
throw new ProviderException("Internal error");
}
state = new long[2];
- this.subkeyH = new long[2];
- this.subkeyH[0] = getLong(subkeyH, 0);
- this.subkeyH[1] = getLong(subkeyH, 8);
+ subkeyHtbl = new long[2*9];
+ subkeyHtbl[0] = getLong(subkeyH, 0);
+ subkeyHtbl[1] = getLong(subkeyH, 8);
}
/**
@@ -192,8 +192,8 @@ final class GHASH {
if (inLen == 0) {
return;
}
- ghashRangeCheck(in, inOfs, inLen, state, subkeyH);
- processBlocks(in, inOfs, inLen/AES_BLOCK_SIZE, state, subkeyH);
+ ghashRangeCheck(in, inOfs, inLen, state, subkeyHtbl);
+ processBlocks(in, inOfs, inLen/AES_BLOCK_SIZE, state, subkeyHtbl);
}
private static void ghashRangeCheck(byte[] in, int inOfs, int inLen, long[] st, long[] subH) {
@@ -217,8 +217,8 @@ final class GHASH {
throw new RuntimeException("internal state has invalid length: " +
st.length);
}
- if (subH.length != 2) {
- throw new RuntimeException("internal subkeyH has invalid length: " +
+ if (subH.length != 18) {
+ throw new RuntimeException("internal subkeyHtbl has invalid length: " +
subH.length);
}
}
diff --git a/jdk/src/share/classes/sun/security/ssl/SSLSocketImpl.java b/jdk/src/share/classes/sun/security/ssl/SSLSocketImpl.java
index ab93e3097..dd2618455 100644
--- a/jdk/src/share/classes/sun/security/ssl/SSLSocketImpl.java
+++ b/jdk/src/share/classes/sun/security/ssl/SSLSocketImpl.java
@@ -439,6 +439,8 @@ public final class SSLSocketImpl
if (!conContext.isNegotiated) {
readHandshakeRecord();
}
+ } catch (InterruptedIOException iioe) {
+ handleException(iioe);
} catch (IOException ioe) {
throw conContext.fatal(Alert.HANDSHAKE_FAILURE,
"Couldn't kickstart handshaking", ioe);
@@ -1309,12 +1311,11 @@ public final class SSLSocketImpl
}
} catch (SSLException ssle) {
throw ssle;
+ } catch (InterruptedIOException iioe) {
+ // don't change exception in case of timeouts or interrupts
+ throw iioe;
} catch (IOException ioe) {
- if (!(ioe instanceof SSLException)) {
- throw new SSLException("readHandshakeRecord", ioe);
- } else {
- throw ioe;
- }
+ throw new SSLException("readHandshakeRecord", ioe);
}
}
@@ -1375,6 +1376,9 @@ public final class SSLSocketImpl
}
} catch (SSLException ssle) {
throw ssle;
+ } catch (InterruptedIOException iioe) {
+ // don't change exception in case of timeouts or interrupts
+ throw iioe;
} catch (IOException ioe) {
if (!(ioe instanceof SSLException)) {
throw new SSLException("readApplicationRecord", ioe);
diff --git a/jdk/src/share/classes/sun/security/ssl/SSLSocketInputRecord.java b/jdk/src/share/classes/sun/security/ssl/SSLSocketInputRecord.java
index 401822759..ab5712acc 100644
--- a/jdk/src/share/classes/sun/security/ssl/SSLSocketInputRecord.java
+++ b/jdk/src/share/classes/sun/security/ssl/SSLSocketInputRecord.java
@@ -26,6 +26,7 @@
package sun.security.ssl;
import java.io.EOFException;
+import java.io.InterruptedIOException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
@@ -47,37 +48,31 @@ import sun.security.ssl.SSLCipher.SSLReadCipher;
final class SSLSocketInputRecord extends InputRecord implements SSLRecord {
private InputStream is = null;
private OutputStream os = null;
- private final byte[] temporary = new byte[1024];
+ private final byte[] header = new byte[headerSize];
+ private int headerOff = 0;
+ // Cache for incomplete record body.
+ private ByteBuffer recordBody = ByteBuffer.allocate(1024);
private boolean formatVerified = false; // SSLv2 ruled out?
// Cache for incomplete handshake messages.
private ByteBuffer handshakeBuffer = null;
- private boolean hasHeader = false; // Had read the record header
-
SSLSocketInputRecord(HandshakeHash handshakeHash) {
super(handshakeHash, SSLReadCipher.nullTlsReadCipher());
}
@Override
int bytesInCompletePacket() throws IOException {
- if (!hasHeader) {
- // read exactly one record
- try {
- int really = read(is, temporary, 0, headerSize);
- if (really < 0) {
- // EOF: peer shut down incorrectly
- return -1;
- }
- } catch (EOFException eofe) {
- // The caller will handle EOF.
- return -1;
- }
- hasHeader = true;
+ // read header
+ try {
+ readHeader();
+ } catch (EOFException eofe) {
+ // The caller will handle EOF.
+ return -1;
}
- byte byteZero = temporary[0];
+ byte byteZero = header[0];
int len = 0;
/*
@@ -93,9 +88,9 @@ final class SSLSocketInputRecord extends InputRecord implements SSLRecord {
* Last sanity check that it's not a wild record
*/
if (!ProtocolVersion.isNegotiable(
- temporary[1], temporary[2], false)) {
+ header[1], header[2], false)) {
throw new SSLException("Unrecognized record version " +
- ProtocolVersion.nameOf(temporary[1], temporary[2]) +
+ ProtocolVersion.nameOf(header[1], header[2]) +
" , plaintext connection?");
}
@@ -109,8 +104,8 @@ final class SSLSocketInputRecord extends InputRecord implements SSLRecord {
/*
* One of the SSLv3/TLS message types.
*/
- len = ((temporary[3] & 0xFF) << 8) +
- (temporary[4] & 0xFF) + headerSize;
+ len = ((header[3] & 0xFF) << 8) +
+ (header[4] & 0xFF) + headerSize;
} else {
/*
* Must be SSLv2 or something unknown.
@@ -121,11 +116,11 @@ final class SSLSocketInputRecord extends InputRecord implements SSLRecord {
*/
boolean isShort = ((byteZero & 0x80) != 0);
- if (isShort && ((temporary[2] == 1) || (temporary[2] == 4))) {
+ if (isShort && ((header[2] == 1) || (header[2] == 4))) {
if (!ProtocolVersion.isNegotiable(
- temporary[3], temporary[4], false)) {
+ header[3], header[4], false)) {
throw new SSLException("Unrecognized record version " +
- ProtocolVersion.nameOf(temporary[3], temporary[4]) +
+ ProtocolVersion.nameOf(header[3], header[4]) +
" , plaintext connection?");
}
@@ -138,9 +133,9 @@ final class SSLSocketInputRecord extends InputRecord implements SSLRecord {
//
// int mask = (isShort ? 0x7F : 0x3F);
// len = ((byteZero & mask) << 8) +
- // (temporary[1] & 0xFF) + (isShort ? 2 : 3);
+ // (header[1] & 0xFF) + (isShort ? 2 : 3);
//
- len = ((byteZero & 0x7F) << 8) + (temporary[1] & 0xFF) + 2;
+ len = ((byteZero & 0x7F) << 8) + (header[1] & 0xFF) + 2;
} else {
// Gobblygook!
throw new SSLException(
@@ -160,34 +155,41 @@ final class SSLSocketInputRecord extends InputRecord implements SSLRecord {
return null;
}
- if (!hasHeader) {
- // read exactly one record
- int really = read(is, temporary, 0, headerSize);
- if (really < 0) {
- throw new EOFException("SSL peer shut down incorrectly");
- }
- hasHeader = true;
- }
+ // read header
+ readHeader();
- Plaintext plaintext = null;
- if (!formatVerified) {
- formatVerified = true;
+ Plaintext[] plaintext = null;
+ boolean cleanInBuffer = true;
+ try {
+ if (!formatVerified) {
+ formatVerified = true;
- /*
- * The first record must either be a handshake record or an
- * alert message. If it's not, it is either invalid or an
- * SSLv2 message.
- */
- if ((temporary[0] != ContentType.HANDSHAKE.id) &&
- (temporary[0] != ContentType.ALERT.id)) {
- hasHeader = false;
- return handleUnknownRecord(temporary);
+ /*
+ * The first record must either be a handshake record or an
+ * alert message. If it's not, it is either invalid or an
+ * SSLv2 message.
+ */
+ if ((header[0] != ContentType.HANDSHAKE.id) &&
+ (header[0] != ContentType.ALERT.id)) {
+ plaintext = handleUnknownRecord();
+ }
}
- }
- // The record header should has consumed.
- hasHeader = false;
- return decodeInputRecord(temporary);
+ // The record header should has consumed.
+ if (plaintext == null) {
+ plaintext = decodeInputRecord();
+ }
+ } catch(InterruptedIOException e) {
+ // do not clean header and recordBody in case of Socket Timeout
+ cleanInBuffer = false;
+ throw e;
+ } finally {
+ if (cleanInBuffer) {
+ headerOff = 0;
+ recordBody.clear();
+ }
+ }
+ return plaintext;
}
@Override
@@ -200,9 +202,7 @@ final class SSLSocketInputRecord extends InputRecord implements SSLRecord {
this.os = outputStream;
}
- // Note that destination may be null
- private Plaintext[] decodeInputRecord(
- byte[] header) throws IOException, BadPaddingException {
+ private Plaintext[] decodeInputRecord() throws IOException, BadPaddingException {
byte contentType = header[0]; // pos: 0
byte majorVersion = header[1]; // pos: 1
byte minorVersion = header[2]; // pos: 2
@@ -227,30 +227,27 @@ final class SSLSocketInputRecord extends InputRecord implements SSLRecord {
}
//
- // Read a complete record.
+ // Read a complete record and store in the recordBody
+ // recordBody is used to cache incoming record and restore in case of
+ // read operation timedout
//
- ByteBuffer destination = ByteBuffer.allocate(headerSize + contentLen);
- int dstPos = destination.position();
- destination.put(temporary, 0, headerSize);
- while (contentLen > 0) {
- int howmuch = Math.min(temporary.length, contentLen);
- int really = read(is, temporary, 0, howmuch);
- if (really < 0) {
- throw new EOFException("SSL peer shut down incorrectly");
+ if (recordBody.position() == 0) {
+ if (recordBody.capacity() < contentLen) {
+ recordBody = ByteBuffer.allocate(contentLen);
}
-
- destination.put(temporary, 0, howmuch);
- contentLen -= howmuch;
+ recordBody.limit(contentLen);
+ } else {
+ contentLen = recordBody.remaining();
}
- destination.flip();
- destination.position(dstPos + headerSize);
+ readFully(contentLen);
+ recordBody.flip();
if (SSLLogger.isOn && SSLLogger.isOn("record")) {
SSLLogger.fine(
"READ: " +
ProtocolVersion.nameOf(majorVersion, minorVersion) +
" " + ContentType.nameOf(contentType) + ", length = " +
- destination.remaining());
+ recordBody.remaining());
}
//
@@ -259,7 +256,7 @@ final class SSLSocketInputRecord extends InputRecord implements SSLRecord {
ByteBuffer fragment;
try {
Plaintext plaintext =
- readCipher.decrypt(contentType, destination, null);
+ readCipher.decrypt(contentType, recordBody, null);
fragment = plaintext.fragment;
contentType = plaintext.contentType;
} catch (BadPaddingException bpe) {
@@ -368,8 +365,7 @@ final class SSLSocketInputRecord extends InputRecord implements SSLRecord {
};
}
- private Plaintext[] handleUnknownRecord(
- byte[] header) throws IOException, BadPaddingException {
+ private Plaintext[] handleUnknownRecord() throws IOException, BadPaddingException {
byte firstByte = header[0];
byte thirdByte = header[2];
@@ -411,32 +407,29 @@ final class SSLSocketInputRecord extends InputRecord implements SSLRecord {
}
int msgLen = ((header[0] & 0x7F) << 8) | (header[1] & 0xFF);
-
- ByteBuffer destination = ByteBuffer.allocate(headerSize + msgLen);
- destination.put(temporary, 0, headerSize);
- msgLen -= 3; // had read 3 bytes of content as header
- while (msgLen > 0) {
- int howmuch = Math.min(temporary.length, msgLen);
- int really = read(is, temporary, 0, howmuch);
- if (really < 0) {
- throw new EOFException("SSL peer shut down incorrectly");
+ if (recordBody.position() == 0) {
+ if (recordBody.capacity() < (headerSize + msgLen)) {
+ recordBody = ByteBuffer.allocate(headerSize + msgLen);
}
-
- destination.put(temporary, 0, howmuch);
- msgLen -= howmuch;
+ recordBody.limit(headerSize + msgLen);
+ recordBody.put(header, 0, headerSize);
+ } else {
+ msgLen = recordBody.remaining();
}
- destination.flip();
+ msgLen -= 3; // had read 3 bytes of content as header
+ readFully(msgLen);
+ recordBody.flip();
/*
* If we can map this into a V3 ClientHello, read and
* hash the rest of the V2 handshake, turn it into a
* V3 ClientHello message, and pass it up.
*/
- destination.position(2); // exclude the header
- handshakeHash.receive(destination);
- destination.position(0);
+ recordBody.position(2); // exclude the header
+ handshakeHash.receive(recordBody);
+ recordBody.position(0);
- ByteBuffer converted = convertToClientHello(destination);
+ ByteBuffer converted = convertToClientHello(recordBody);
if (SSLLogger.isOn && SSLLogger.isOn("packet")) {
SSLLogger.fine(
@@ -456,28 +449,42 @@ final class SSLSocketInputRecord extends InputRecord implements SSLRecord {
}
}
- // Read the exact bytes of data, otherwise, return -1.
- private static int read(InputStream is,
- byte[] buffer, int offset, int len) throws IOException {
- int n = 0;
- while (n < len) {
- int readLen = is.read(buffer, offset + n, len - n);
- if (readLen < 0) {
- if (SSLLogger.isOn && SSLLogger.isOn("packet")) {
- SSLLogger.fine("Raw read: EOF");
- }
- return -1;
+ // Read the exact bytes of data, otherwise, throw IOException.
+ private int readFully(int len) throws IOException {
+ int end = len + recordBody.position();
+ int off = recordBody.position();
+ try {
+ while (off < end) {
+ off += read(is, recordBody.array(), off, end - off);
}
+ } finally {
+ recordBody.position(off);
+ }
+ return len;
+ }
+
+ // Read SSE record header, otherwise, throw IOException.
+ private int readHeader() throws IOException {
+ while (headerOff < headerSize) {
+ headerOff += read(is, header, headerOff, headerSize - headerOff);
+ }
+ return headerSize;
+ }
+ private static int read(InputStream is, byte[] buf, int off, int len) throws IOException {
+ int readLen = is.read(buf, off, len);
+ if (readLen < 0) {
if (SSLLogger.isOn && SSLLogger.isOn("packet")) {
- ByteBuffer bb = ByteBuffer.wrap(buffer, offset + n, readLen);
- SSLLogger.fine("Raw read", bb);
+ SSLLogger.fine("Raw read: EOF");
}
-
- n += readLen;
+ throw new EOFException("SSL peer shut down incorrectly");
}
- return n;
+ if (SSLLogger.isOn && SSLLogger.isOn("packet")) {
+ ByteBuffer bb = ByteBuffer.wrap(buf, off, readLen);
+ SSLLogger.fine("Raw read", bb);
+ }
+ return readLen;
}
// Try to use up the input stream without impact the performance too much.
diff --git a/jdk/src/share/classes/sun/security/ssl/SSLTransport.java b/jdk/src/share/classes/sun/security/ssl/SSLTransport.java
index b3d03b370..78e13ea2c 100644
--- a/jdk/src/share/classes/sun/security/ssl/SSLTransport.java
+++ b/jdk/src/share/classes/sun/security/ssl/SSLTransport.java
@@ -27,6 +27,7 @@ package sun.security.ssl;
import java.io.EOFException;
import java.io.IOException;
+import java.io.InterruptedIOException;
import java.nio.ByteBuffer;
import javax.crypto.AEADBadTagException;
import javax.crypto.BadPaddingException;
@@ -134,6 +135,9 @@ interface SSLTransport {
} catch (EOFException eofe) {
// rethrow EOFException, the call will handle it if neede.
throw eofe;
+ } catch (InterruptedIOException iioe) {
+ // don't close the Socket in case of timeouts or interrupts.
+ throw iioe;
} catch (IOException ioe) {
throw context.fatal(Alert.UNEXPECTED_MESSAGE, ioe);
}
diff --git a/jdk/test/micro/org/openjdk/bench/javax/crypto/full/AESGCMBench.java b/jdk/test/micro/org/openjdk/bench/javax/crypto/full/AESGCMBench.java
new file mode 100644
index 000000000..258672f59
--- /dev/null
+++ b/jdk/test/micro/org/openjdk/bench/javax/crypto/full/AESGCMBench.java
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package org.openjdk.bench.javax.crypto.full;
+
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Setup;
+
+import javax.crypto.Cipher;
+import javax.crypto.spec.GCMParameterSpec;
+import javax.crypto.spec.SecretKeySpec;
+
+/**
+ * This performance tests runs AES/GCM encryption and decryption using byte[]
+ * as input and output buffers for single and multi-part testing.
+ *
+ * This test rotates the IV and creates a new GCMParameterSpec for each encrypt
+ * benchmark operation
+ */
+
+public class AESGCMBench extends CryptoBase {
+
+ @Param({"128"})
+ private int keyLength;
+
+ @Param({"1024", "1500", "4096", "16384"})
+ private int dataSize;
+
+ byte[] encryptedData;
+ byte[] in, out;
+ private Cipher encryptCipher;
+ private Cipher decryptCipher;
+ SecretKeySpec ks;
+ GCMParameterSpec gcm_spec;
+ byte[] iv;
+
+ private static final int IV_BUFFER_SIZE = 32;
+ private static final int IV_MODULO = IV_BUFFER_SIZE - 16;
+ int iv_index = 0;
+ int updateLen = 0;
+
+ private int next_iv_index() {
+ int r = iv_index;
+ iv_index = (iv_index + 1) % IV_MODULO;
+ return r;
+ }
+
+ @Setup
+ public void setup() throws Exception {
+ setupProvider();
+
+ // Setup key material
+ byte[] keystring = fillSecureRandom(new byte[keyLength / 8]);
+ ks = new SecretKeySpec(keystring, "AES");
+ iv = fillSecureRandom(new byte[IV_BUFFER_SIZE]);
+ gcm_spec = new GCMParameterSpec(96, iv, next_iv_index(), 16);
+
+ // Setup Cipher classes
+ encryptCipher = makeCipher(prov, "AES/GCM/NoPadding");
+ encryptCipher.init(Cipher.ENCRYPT_MODE, ks, gcm_spec);
+ decryptCipher = makeCipher(prov, "AES/GCM/NoPadding");
+ decryptCipher.init(Cipher.DECRYPT_MODE, ks,
+ encryptCipher.getParameters().
+ getParameterSpec(GCMParameterSpec.class));
+
+ // Setup input/output buffers
+ in = fillRandom(new byte[dataSize]);
+ encryptedData = new byte[encryptCipher.getOutputSize(in.length)];
+ out = new byte[encryptedData.length];
+ encryptCipher.doFinal(in, 0, in.length, encryptedData, 0);
+ updateLen = in.length / 2;
+
+ }
+
+ @Benchmark
+ public void encrypt() throws Exception {
+ gcm_spec = new GCMParameterSpec(96, iv, next_iv_index(), 16);
+ encryptCipher.init(Cipher.ENCRYPT_MODE, ks, gcm_spec);
+ encryptCipher.doFinal(in, 0, in.length, out, 0);
+ }
+
+ @Benchmark
+ public void encryptMultiPart() throws Exception {
+ gcm_spec = new GCMParameterSpec(96, iv, next_iv_index(), 16);
+ encryptCipher.init(Cipher.ENCRYPT_MODE, ks, gcm_spec);
+ int outOfs = encryptCipher.update(in, 0, updateLen, out, 0);
+ encryptCipher.doFinal(in, updateLen, in.length - updateLen,
+ out, outOfs);
+ }
+
+ @Benchmark
+ public void decrypt() throws Exception {
+ decryptCipher.init(Cipher.DECRYPT_MODE, ks,
+ encryptCipher.getParameters().
+ getParameterSpec(GCMParameterSpec.class));
+ decryptCipher.doFinal(encryptedData, 0, encryptedData.length, out, 0);
+ }
+
+ @Benchmark
+ public void decryptMultiPart() throws Exception {
+ decryptCipher.init(Cipher.DECRYPT_MODE, ks,
+ encryptCipher.getParameters().
+ getParameterSpec(GCMParameterSpec.class));
+ decryptCipher.update(encryptedData, 0, updateLen, out, 0);
+ decryptCipher.doFinal(encryptedData, updateLen,
+ encryptedData.length - updateLen, out, 0);
+ }
+}
\ No newline at end of file
diff --git a/jdk/test/micro/org/openjdk/bench/javax/crypto/full/AESGCMByteBuffer.java b/jdk/test/micro/org/openjdk/bench/javax/crypto/full/AESGCMByteBuffer.java
new file mode 100644
index 000000000..cb6d20c51
--- /dev/null
+++ b/jdk/test/micro/org/openjdk/bench/javax/crypto/full/AESGCMByteBuffer.java
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation. Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package org.openjdk.bench.javax.crypto.full;
+
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Setup;
+
+import javax.crypto.Cipher;
+import javax.crypto.spec.GCMParameterSpec;
+import javax.crypto.spec.SecretKeySpec;
+import java.nio.ByteBuffer;
+
+/**
+ * This performance tests runs AES/GCM encryption and decryption using heap and
+ * direct ByteBuffers as input and output buffers for single and multi-part
+ * operations.
+ *
+ * This test rotates the IV and creates a new GCMParameterSpec for each encrypt
+ * benchmark operation
+ */
+
+public class AESGCMByteBuffer extends CryptoBase {
+
+ @Param({"128"})
+ private int keyLength;
+
+ @Param({"1024", "1500", "4096", "16384"})
+ private int dataSize;
+
+ @Param({"direct", "heap"})
+ private String dataMethod;
+
+ byte[] data;
+ ByteBuffer encryptedData;
+ ByteBuffer in, out;
+ private Cipher encryptCipher;
+ private Cipher decryptCipher;
+ SecretKeySpec ks;
+ GCMParameterSpec gcm_spec;
+ byte[] iv;
+
+ private static final int IV_BUFFER_SIZE = 32;
+ private static final int IV_MODULO = IV_BUFFER_SIZE - 16;
+ int iv_index = 0;
+ int updateLen = 0;
+
+ private int next_iv_index() {
+ int r = iv_index;
+ iv_index = (iv_index + 1) % IV_MODULO;
+ return r;
+ }
+
+ @Setup
+ public void setup() throws Exception {
+ setupProvider();
+
+ // Setup key material
+ byte[] keystring = fillSecureRandom(new byte[keyLength / 8]);
+ ks = new SecretKeySpec(keystring, "AES");
+ iv = fillSecureRandom(new byte[IV_BUFFER_SIZE]);
+ gcm_spec = new GCMParameterSpec(96, iv, next_iv_index(), 16);
+
+ // Setup Cipher classes
+ encryptCipher = makeCipher(prov, "AES/GCM/NoPadding");
+ encryptCipher.init(Cipher.ENCRYPT_MODE, ks, gcm_spec);
+ decryptCipher = makeCipher(prov, "AES/GCM/NoPadding");
+ decryptCipher.init(Cipher.DECRYPT_MODE, ks,
+ encryptCipher.getParameters().
+ getParameterSpec(GCMParameterSpec.class));
+
+ // Setup input/output buffers
+ data = fillRandom(new byte[dataSize]);
+ if (dataMethod.equalsIgnoreCase("direct")) {
+ in = ByteBuffer.allocateDirect(data.length);
+ in.put(data);
+ in.flip();
+ encryptedData = ByteBuffer.allocateDirect(
+ encryptCipher.getOutputSize(data.length));
+ out = ByteBuffer.allocateDirect(encryptedData.capacity());
+ } else if (dataMethod.equalsIgnoreCase("heap")) {
+ in = ByteBuffer.wrap(data);
+ encryptedData = ByteBuffer.allocate(
+ encryptCipher.getOutputSize(data.length));
+ out = ByteBuffer.allocate(encryptedData.capacity());
+ }
+
+ encryptCipher.doFinal(in, encryptedData);
+ encryptedData.flip();
+ in.flip();
+ updateLen = in.remaining() / 2;
+ }
+
+ @Benchmark
+ public void encrypt() throws Exception {
+ gcm_spec = new GCMParameterSpec(96, iv, next_iv_index(), 16);
+ encryptCipher.init(Cipher.ENCRYPT_MODE, ks, gcm_spec);
+ encryptCipher.doFinal(in, out);
+ out.flip();
+ in.flip();
+ }
+
+ @Benchmark
+ public void encryptMultiPart() throws Exception {
+ gcm_spec = new GCMParameterSpec(96, iv, next_iv_index(), 16);
+ encryptCipher.init(Cipher.ENCRYPT_MODE, ks, gcm_spec);
+ in.limit(updateLen);
+ encryptCipher.update(in, out);
+ in.limit(in.capacity());
+ encryptCipher.doFinal(in, out);
+ out.flip();
+ in.flip();
+ }
+
+ @Benchmark
+ public void decrypt() throws Exception {
+ decryptCipher.init(Cipher.DECRYPT_MODE, ks,
+ encryptCipher.getParameters().
+ getParameterSpec(GCMParameterSpec.class));
+ decryptCipher.doFinal(encryptedData, out);
+ encryptedData.flip();
+ out.flip();
+ }
+
+ @Benchmark
+ public void decryptMultiPart() throws Exception {
+ decryptCipher.init(Cipher.DECRYPT_MODE, ks,
+ encryptCipher.getParameters().
+ getParameterSpec(GCMParameterSpec.class));
+
+ int len = encryptedData.remaining();
+ encryptedData.limit(updateLen);
+ decryptCipher.update(encryptedData, out);
+ encryptedData.limit(len);
+
+ decryptCipher.doFinal(encryptedData, out);
+ encryptedData.flip();
+ out.flip();
+ }
+
+}
\ No newline at end of file
diff --git a/jdk/test/micro/org/openjdk/bench/javax/crypto/full/CryptoBase.java b/jdk/test/micro/org/openjdk/bench/javax/crypto/full/CryptoBase.java
new file mode 100644
index 000000000..4af12703b
--- /dev/null
+++ b/jdk/test/micro/org/openjdk/bench/javax/crypto/full/CryptoBase.java
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package org.openjdk.bench.javax.crypto.full;
+
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+
+import javax.crypto.BadPaddingException;
+import javax.crypto.Cipher;
+import javax.crypto.IllegalBlockSizeException;
+import javax.crypto.NoSuchPaddingException;
+import java.security.NoSuchAlgorithmException;
+import java.security.Provider;
+import java.security.SecureRandom;
+import java.security.Security;
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
+
+
+@Fork(jvmArgsAppend = {"-XX:+AlwaysPreTouch"}, value = 5)
+@Warmup(iterations = 3, time = 3)
+@Measurement(iterations = 8, time = 2)
+@OutputTimeUnit(TimeUnit.SECONDS)
+@State(Scope.Thread)
+@BenchmarkMode(Mode.Throughput)
+public class CryptoBase {
+
+ @Param({""})
+ private String provider;
+
+ public Provider prov = null;
+
+ @Setup
+ public void setupProvider() {
+ if (provider != null && !provider.isEmpty()) {
+ prov = Security.getProvider(provider);
+ if (prov == null) {
+ throw new RuntimeException("Can't find prodiver \"" + provider + "\"");
+ }
+ }
+ }
+
+ public static Cipher makeCipher(Provider prov, String algorithm) throws NoSuchPaddingException, NoSuchAlgorithmException {
+ return (prov == null) ? Cipher.getInstance(algorithm) : Cipher.getInstance(algorithm, prov);
+ }
+
+ public static byte[][] fillRandom(byte[][] data) {
+ Random rnd = new Random();
+ for (byte[] d : data) {
+ rnd.nextBytes(d);
+ }
+ return data;
+ }
+
+ public static byte[] fillRandom(byte[] data) {
+ Random rnd = new Random();
+ rnd.nextBytes(data);
+ return data;
+ }
+
+ public static byte[] fillSecureRandom(byte[] data) {
+ SecureRandom rnd = new SecureRandom();
+ rnd.nextBytes(data);
+ return data;
+ }
+
+ public static byte[][] fillEncrypted(byte[][] data, Cipher encryptCipher) throws BadPaddingException, IllegalBlockSizeException {
+ byte[][] encryptedData = new byte[data.length][];
+ for (int i = 0; i < encryptedData.length; i++) {
+ encryptedData[i] = encryptCipher.doFinal(data[i]);
+ }
+ return encryptedData;
+ }
+}
\ No newline at end of file
diff --git a/jdk/test/micro/org/openjdk/bench/javax/crypto/small/AESGCMBench.java b/jdk/test/micro/org/openjdk/bench/javax/crypto/small/AESGCMBench.java
new file mode 100644
index 000000000..a21b0c87f
--- /dev/null
+++ b/jdk/test/micro/org/openjdk/bench/javax/crypto/small/AESGCMBench.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2015, 2021, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package org.openjdk.bench.javax.crypto.small;
+
+import org.openjdk.jmh.annotations.Param;
+
+public class AESGCMBench extends
+ org.openjdk.bench.javax.crypto.full.AESGCMBench {
+
+ @Param({"128"})
+ private int keyLength;
+
+ @Param({"1024"})
+ private int dataSize;
+
+}
\ No newline at end of file
diff --git a/jdk/test/micro/org/openjdk/bench/javax/crypto/small/AESGCMByteBuffer.java b/jdk/test/micro/org/openjdk/bench/javax/crypto/small/AESGCMByteBuffer.java
new file mode 100644
index 000000000..2e389d300
--- /dev/null
+++ b/jdk/test/micro/org/openjdk/bench/javax/crypto/small/AESGCMByteBuffer.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package org.openjdk.bench.javax.crypto.small;
+
+import org.openjdk.jmh.annotations.Param;
+
+public class AESGCMByteBuffer extends
+ org.openjdk.bench.javax.crypto.full.AESGCMByteBuffer {
+
+ @Param({"128"})
+ private int keyLength;
+
+ @Param({"1024"})
+ private int dataSize;
+
+}
\ No newline at end of file
diff --git a/jdk/test/sun/security/ssl/SSLSocketImpl/ClientTimeout.java b/jdk/test/sun/security/ssl/SSLSocketImpl/ClientTimeout.java
index 3eb1d7b89..7678cc71f 100644
--- a/jdk/test/sun/security/ssl/SSLSocketImpl/ClientTimeout.java
+++ b/jdk/test/sun/security/ssl/SSLSocketImpl/ClientTimeout.java
@@ -26,8 +26,7 @@
/*
* @test
- * @bug 4836493
- * @ignore need further evaluation
+ * @bug 4836493 8239798
* @summary Socket timeouts for SSLSockets causes data corruption.
* @run main/othervm ClientTimeout
*/
diff --git a/jdk/test/sun/security/ssl/SSLSocketImpl/SSLExceptionForIOIssue.java b/jdk/test/sun/security/ssl/SSLSocketImpl/SSLExceptionForIOIssue.java
index 3e626a257..5578ea725 100644
--- a/jdk/test/sun/security/ssl/SSLSocketImpl/SSLExceptionForIOIssue.java
+++ b/jdk/test/sun/security/ssl/SSLSocketImpl/SSLExceptionForIOIssue.java
@@ -36,7 +36,7 @@
import javax.net.ssl.*;
import java.io.*;
-import java.net.InetAddress;
+import java.net.*;
public class SSLExceptionForIOIssue implements SSLContextTemplate {
@@ -139,7 +139,7 @@ public class SSLExceptionForIOIssue implements SSLContextTemplate {
} catch (SSLProtocolException | SSLHandshakeException sslhe) {
clientException = sslhe;
System.err.println("unexpected client exception: " + sslhe);
- } catch (SSLException ssle) {
+ } catch (SSLException | SocketTimeoutException ssle) {
// the expected exception, ignore it
System.err.println("expected client exception: " + ssle);
} catch (Exception e) {
--
2.17.1
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/a-xiang-and-shanhaijing/openjdk-1.8.0_2.git
[email protected]:a-xiang-and-shanhaijing/openjdk-1.8.0_2.git
a-xiang-and-shanhaijing
openjdk-1.8.0_2
电酱jdk8
master

搜索帮助