|
These sound like really lightweight fibers, so .NET must suck at handling them.
|
|
|
|
|
No, the issue is most fibers resolve to examination of a single character in the input so if you have 10 of them the same character gets examined as much as 10 times.
This is a byproduct of the design of a Pike VM, itself an artifact of the way NFA expressions work so there's very little to be done about it except convert to a DFA (the optimization process)
Reduce the fibers and it speeds right up:
NFA ran with 10 max fibers and 3.5 average char passes
NFA+DFA (optimized) ran with 6 max fibers and 2.5 average char passes
DFA ran with 2.5 max fibers and 1 average char passes
Pass #1
NFA: Lexed in 1.575287 msec
NFA+DFA (optimized): Lexed in 1.054843 msec
DFA: Lexed in 0.901254 msec
Pass #2
NFA: Lexed in 1.529819 msec
NFA+DFA (optimized): Lexed in 1.100836 msec
DFA: Lexed in 0.830835 msec
Pass #3
NFA: Lexed in 1.523334 msec
NFA+DFA (optimized): Lexed in 1.049213 msec
DFA: Lexed in 0.851737 msec
Pass #4
NFA: Lexed in 1.400265 msec
NFA+DFA (optimized): Lexed in 1.03485 msec
DFA: Lexed in 0.829009 msec
Real programmers use butterflies
|
|
|
|
|
If I have understood it correctly: yield uses fibers, foreach uses yield.
So try to swap a few well chosen foreach loops for classic for loops and see what happens.
|
|
|
|
|
I'm not using any foreach loops. I've already optimized the VM itself to within an inch of its life
Real programmers use butterflies
|
|
|
|
|
|
I thought the goal was to speed this up?
Real programmers use butterflies
|
|
|
|
|
I didn't tell you to use it, I'm just looking for problems.
|
|
|
|
|
Look away. Here's almost all of it. The stuff you don't see is very thin
public static int Run(int[][] prog,LexContext input)
{
input.EnsureStarted();
int i,match=-1;
_Fiber[] currentFibers, nextFibers, tmp;
int currentFiberCount=0, nextFiberCount=0;
int[] pc;
int sp=0;
var sb = new StringBuilder(64);
int[] saved, matched;
saved = new int[2];
currentFibers = new _Fiber[prog.Length];
nextFibers = new _Fiber[prog.Length];
_EnqueueFiber(ref currentFiberCount, ref currentFibers, new _Fiber(prog,0, saved), 0);
matched = null;
var cur = -1;
if (LexContext.EndOfInput != input.Current)
{
var ch1 = unchecked((char)input.Current);
if (char.IsHighSurrogate(ch1))
{
if (-1 == input.Advance())
throw new ExpectingException("Expecting low surrogate in unicode stream. The input source is corrupt or not valid Unicode", input.Line, input.Column, input.Position, input.FileOrUrl);
var ch2 = unchecked((char)input.Current);
cur = char.ConvertToUtf32(ch1, ch2);
}
else
cur = ch1;
}
while(0<currentFiberCount)
{
bool passed = false;
for (i = 0; i < currentFiberCount; ++i)
{
var t = currentFibers[i];
pc = t.Program[t.Index];
saved = t.Saved;
switch (pc[0])
{
case Compiler.Switch:
var idx = 1;
while(idx<pc.Length && -2<pc[idx])
{
if (_InRanges(pc, ref idx, cur))
{
while (-1!=pc[idx])
++idx;
++idx;
passed = true;
_EnqueueFiber(ref nextFiberCount, ref nextFibers, new _Fiber(t, pc[idx], saved), sp + 1);
idx = pc.Length;
break;
}
else
{
while (-1!=pc[idx])
++idx;
++idx;
}
++idx;
}
if(idx<pc.Length&&-2==pc[idx])
{
++idx;
while(idx<pc.Length)
{
_EnqueueFiber(ref currentFiberCount, ref currentFibers, new _Fiber(t, pc[idx], saved), sp);
++idx;
}
}
break;
case Compiler.Char:
if (cur!= pc[1])
{
break;
}
goto case Compiler.Any;
case Compiler.Set:
idx = 1;
if (!_InRanges(pc,ref idx, cur))
{
break;
}
goto case Compiler.Any;
case Compiler.NSet:
idx = 1;
if (_InRanges(pc, ref idx,cur))
{
break;
}
goto case Compiler.Any;
case Compiler.UCode:
var str = char.ConvertFromUtf32(cur);
if (unchecked((int)char.GetUnicodeCategory(str,0) != pc[1]))
{
break;
}
goto case Compiler.Any;
case Compiler.NUCode:
str = char.ConvertFromUtf32(cur);
if (unchecked((int)char.GetUnicodeCategory(str,0)) == pc[1])
{
break;
}
goto case Compiler.Any;
case Compiler.Any:
if (LexContext.EndOfInput==input.Current)
{
break;
}
passed = true;
_EnqueueFiber(ref nextFiberCount, ref nextFibers, new _Fiber(t, t.Index+1, saved), sp+1);
break;
case Compiler.Match:
matched = saved;
match = pc[1];
i = currentFiberCount;
break;
}
}
if (passed)
{
sb.Append(char.ConvertFromUtf32(cur));
input.Advance();
if (LexContext.EndOfInput != input.Current)
{
var ch1 = unchecked((char)input.Current);
if (char.IsHighSurrogate(ch1))
{
input.Advance();
if (-1 == input.Advance())
throw new ExpectingException("Expecting low surrogate in unicode stream. The input source is corrupt or not valid Unicode", input.Line, input.Column, input.Position, input.FileOrUrl);
++sp;
var ch2 = unchecked((char)input.Current);
cur = char.ConvertToUtf32(ch1, ch2);
}
else
cur = ch1;
}
else
cur = -1;
++sp;
}
tmp = currentFibers;
currentFibers = nextFibers;
nextFibers = tmp;
currentFiberCount = nextFiberCount;
nextFiberCount = 0;
}
if (null!=matched)
{
var start = matched[0];
var len = matched[1];
input.CaptureBuffer.Append(sb.ToString(start, len - start));
return match;
};
return -1;
}
static void _EnqueueFiber(ref int lcount,ref _Fiber[] l, _Fiber t, int sp)
{
if(l.Length<=lcount)
{
var newarr = new _Fiber[l.Length * 2];
Array.Copy(l, 0, newarr, 0, l.Length);
l = newarr;
}
l[lcount] = t;
++lcount;
var pc = t.Program[t.Index];
switch (pc[0])
{
case Compiler.Jmp:
for (var j = 1; j < pc.Length; j++)
_EnqueueFiber(ref lcount,ref l, new _Fiber(t.Program, pc[j],t.Saved),sp);
break;
case Compiler.Save:
var slot = pc[1];
var max = slot > t.Saved.Length ? slot : t.Saved.Length;
var saved = new int[max];
for (var i = 0;i<t.Saved.Length;++i)
saved[i]=t.Saved[i];
saved[slot] = sp;
_EnqueueFiber(ref lcount,ref l, new _Fiber(t,t.Index+1, saved), sp);
break;
}
}
private struct _Fiber
{
public readonly int[][] Program;
public readonly int Index;
public int[] Saved;
public _Fiber(int[][] program, int index,int[] saved)
{
Program = program;
Index = index;
Saved = saved;
}
public _Fiber(_Fiber fiber, int index,int[] saved)
{
Program = fiber.Program;
Index = index;
Saved = saved;
}
}
Real programmers use butterflies
|
|
|
|
|
Oh, no wonder then, you're doing it on purpose...
|
|
|
|
|
Doing what on purpose? I'm a little slow this morning.
Real programmers use butterflies
|
|
|
|
|
Spawning loads of fibers.
Or is this auto generated code again?
|
|
|
|
|
Well, it's not on purpose per se. I mean yes, I'm spawning a lot of them, but the idea is to keep as few active or "alive" at one time as possible.
when I see a jmp with 3 operands it spawns 2 fibers in addition to a primary fiber.
That's what I don't want, since every fiber has to examine the character under the cursor which leads to many examinations of the same character. There's no way to optimize this out because it's rather the point of the fiber running in the first place. Multiple examinations are a byproduct of the NFA algorithm.
My goal is simply to reduce/eliminate the amount of jmps and especially the number of operands they have.
A pure DFA can run by examining each character only once.
Real programmers use butterflies
|
|
|
|
|
|
Yes, this is the Lounge.
Real programmers use butterflies
|
|
|
|
|
|
|
It makes my head spin because it explains every line of code I ever wrote so clearly, and makes it unnecessary to write code in the future
«One day it will have to be officially admitted that what we have christened reality is an even greater illusion than the world of dreams.» Salvador Dali
|
|
|
|
|
Plus: it doesn't need any icon anymore.
"Five fruits and vegetables a day? What a joke!
Personally, after the third watermelon, I'm full."
|
|
|
|
|
Every word of that article made total sense!
However, combined into sentences they became completely meaningless to me ...
"I have no idea what I did, but I'm taking full credit for it." - ThisOldTony
AntiTwitter: @DalekDave is now a follower!
|
|
|
|
|
Stylianos Polychroniadis wrote: The Rotating Lepton Model vs the Standard Model It's not a confrontational system; it's just a new set of statistical analyses, based on new-ish assumptions.
The fact that even the Hamiltonians are calculated to within 1% of empirical observations is impressive enough for a lot of people to take notice and try it out for themselves, using variants of the system it's been tried with.
If results are good, it will be accepted gratefully -- there's a lot of other research that will profit from it -- so try to avoid saying anything that might be taken as being antagonistic toward others who have spent chunks of their lives working on different models.
BTW, you did notice that this is a programming message board, didn't you? Just because computer types stick the word "quantum" in front of everything, these days, doesn't mean they're into the Physics (just like the prevalence of the word "cloud" doesn't mean we know anything about meteorology, and "artificial intelligence" doesn't mean we've got any brains).
I wanna be a eunuchs developer! Pass me a bread knife!
|
|
|
|
|
Mark_Wallace wrote: "artificial intelligence" doesn't mean we've got any brains).
There speaks a man who has visited QA!
"I have no idea what I did, but I'm taking full credit for it." - ThisOldTony
AntiTwitter: @DalekDave is now a follower!
|
|
|
|
|
OriginalGriff wrote: There speaks a man who has visited QA! And came the closest he's ever come to breaking the four-minute mile, immediately afterward.
I wanna be a eunuchs developer! Pass me a bread knife!
|
|
|
|
|
Mark_Wallace wrote: BTW, you did notice that this is a programming message board, didn't you? Oh, you mean a bunch of geeks that even when it's safe out still avoid socializing in real life talking about flu/corona etc viruses spread by human to human contact??
The intelligence may not be artificial, but the [keyboard warrior] bravado often is.
after many otherwise intelligent sounding suggestions that achieved nothing the nice folks at Technet said the only solution was to low level format my hard disk then reinstall my signature. Sadly, this still didn't fix the issue!
|
|
|
|
|
Well, it's thermodynamic systems modelling and satellite trajectories I've been programming for the last 20 years so I guess to me it figures.
A little piece of mind if anyone cares for my two cents. If we only criticize based on speculation and not in-depth knowledge ie failing to provide constructive grounds for conversation, then we fall in a dark infinite loop of self-admiration. Everybody has an opinion nowadays I'm afraid. The science and engineering discipline we have chosen, since we are here on CP, should have taught us by itterations of education or training that progress and achievement is a life-time process. I'd rather be more humble when dealing with things I don't quite understand yet and then be even more humble when I have mastered them and I have to express my opinion in public.
modified 2-Feb-20 14:55pm.
|
|
|
|
|
I don't see anyone criticising the article (which is normally my remit, so think yourself lucky that it's something I'm into), so I'm unsure why you went off on that tangent.
I wanna be a eunuchs developer! Pass me a bread knife!
|
|
|
|
|