google chrome - javascript indexOf with millions of matches -
i'm trying extract few lines representing xml elements file. user provides file using simple <input type="file">
tag, , file read text filereader
, , given parameter function:
var relevantdelimiters = [{"begin":"<header>","end":"</header>"} ,{"begin":" <someelement>","end":"</someelement>"}]; function dealwithstring(invalidxml) { var validxml = ""; (var i=0; i<relevantdelimiters.length; i++) { delimiter = relevantdelimiters[i]; while (invalidxml.indexof(delimiter.begin) != -1) { //while there relevant elements of kind left: startpos = invalidxml.indexof(delimiter.begin); endpos = invalidxml.indexof(delimiter.end); //append end result: validxml+=invalidxml.substring(startpos,endpos+delimiter.end.length)+"\n"; //take item out of input process next item invalidxml = invalidxml.replace(invalidxml.substring(startpos,endpos+delimiter.end.length),""); } } //return fixed data return validxml; }
this approach seems work fine small amount of matches in input text file, given file of 1.5mb, script stuck (running google chrome, making it's tab non-responsive). file contains million "relevant elements", meaning matches relevantdelimiters
.
how can optimize this?
instead of repeatedly "taking item out of input" calling replace
on it, should use second argument indexof
: fromindex
. way, it'll search next occurence after given index, , can loop through large input without needing touch it.
function dealwithstring(invalidxml) { var validxml = ""; (var i=0; i<relevantdelimiters.length; i++) { var delimiter = relevantdelimiters[i], pos = 0, startpos; while ((startpos = invalidxml.indexof(delimiter.begin, pos)) != -1) { //while there relevant elements of kind left: var endpos = invalidxml.indexof(delimiter.end, startpos); // assert(endpos != -1) - otherwise go horribly wrong pos = endpos+delimiter.end.length; //append end result: validxml += invalidxml.slice(startpos, pos) + "\n"; } } return validxml; }
Comments
Post a Comment