From 975f4e201e55b0a216a61ddae0d01e6e6bd6372c Mon Sep 17 00:00:00 2001 From: mysticbbs Date: Mon, 1 Apr 2013 03:29:10 -0400 Subject: [PATCH] Massive speed improvements during import --- mystic/mutil.ini | 8 +- mystic/mutil_echocore.pas | 147 +++++++++++++++++++++++------------- mystic/mutil_echoimport.pas | 42 ++++++----- 3 files changed, 126 insertions(+), 71 deletions(-) diff --git a/mystic/mutil.ini b/mystic/mutil.ini index 5493f18..1975d1c 100644 --- a/mystic/mutil.ini +++ b/mystic/mutil.ini @@ -407,10 +407,16 @@ ; If you want to save duplicated / bad messages to a certain message ; base, then set this value to the *INDEX* of the message base (viewable ; at the top of the msgbase editor). Otherwise, set this value to -1 - ; or leave it commented out and they will be ignored. + ; or leave it commented out and they will be ignored. ; dupe_msg_index = 5 + ; This defines the number of messages to keep in the duplicate database. + ; Each message takes 8 bytes of data, so for example 32,000 messages takes + ; 256kb of memory while importing messages. Max is 250,000 messages. + + dupe_db_size = 32000 + ; If you want MUTIL to auto create message bases when it finds an ; echomail message for a non-existing base, set this value to true ; and use the options below to define the default values diff --git a/mystic/mutil_echocore.pas b/mystic/mutil_echocore.pas index 9621bbf..71c9b27 100644 --- a/mystic/mutil_echocore.pas +++ b/mystic/mutil_echocore.pas @@ -14,7 +14,30 @@ Uses mUtil_Common; Const - MaxDupeChecks = 40000; + MaxDupeSize = 250000; + +Type + RecMsgDupe = Record + Header : Cardinal; + Text : Cardinal; + End; + + RecDupePTR = ^RecDupeArray; + RecDupeArray = Array[1..MaxDupeSize] of RecMsgDupe; + +Type + TPKTDupe = Class + DupeData : RecDupePTR; + MaxDupes : Cardinal; + CurDupes : Cardinal; + TotalDupes : Cardinal; + + Constructor Create (Max: Cardinal); + Destructor Destroy; Override; + + Function IsDuplicate (Var D: RecMsgDupe) : Boolean; + Procedure AddDuplicate (Var D: RecMsgDupe); + End; Const pktPrivate = $0001; @@ -66,11 +89,6 @@ Type Filler : Array[1..20] of Char; End; - RecMsgDupe = Record - Header : Cardinal; - Text : Cardinal; - End; - RecMsgLine = String[79]; TPKTReader = Class @@ -79,7 +97,6 @@ Type Dest : RecEchoMailAddr; MsgHdr : RecPKTMessageHdr; MsgFile : PCharFile; - DupeFile : PCharFile; MsgTo : String[50]; MsgFrom : String[50]; MsgSubj : String[80]; @@ -98,18 +115,86 @@ Type Function Open (FN: String) : Boolean; Function GetMessage (NetMail: Boolean) : Boolean; - Function IsDuplicate : Boolean; - Procedure AddDuplicate; End; Implementation +Constructor TPKTDupe.Create (Max: Cardinal); +Var + F : File; + RS : Cardinal; +Begin + Inherited Create; + + If Max > MaxDupeSize Then Max := MaxDupeSize; + + MaxDupes := Max; + TotalDupes := 0; + + GetMem (DupeData, MaxDupes * SizeOf(RecMsgDupe)); + + Assign (F, bbsConfig.DataPath + 'echodupes.dat'); + {$I-} Reset (F, 1); {$I+} + + If IoResult <> 0 Then ReWrite (F, 1); + + BlockRead (F, DupeData^, MaxDupes * SizeOf(RecMsgDupe), TotalDupes); + Close (F); + + If TotalDupes > 0 Then + TotalDupes := TotalDupes DIV SizeOf(RecMsgDupe); + + CurDupes := TotalDupes; +End; + +Function TPKTDupe.IsDuplicate (Var D: RecMsgDupe) : Boolean; +Var + Count : Cardinal; +Begin + Result := False; + + For Count := 1 to TotalDupes Do + If (D.Header = DupeData^[Count].Header) and (D.Text = DupeData^[Count].Text) Then Begin + Result := True; + + Exit; + End; +End; + +Procedure TPKTDupe.AddDuplicate (Var D: RecMsgDupe); +Begin + If CurDupes = MaxDupes Then Begin + TotalDupes := MaxDupes; + CurDupes := 0; + End; + + Inc (CurDupes); + + If TotalDupes < CurDupes Then + TotalDupes := CurDupes; + + DupeData^[CurDupes] := D; +End; + +Destructor TPKTDupe.Destroy; +Var + F : File; +Begin + Assign (F, bbsConfig.DataPath + 'echodupes.dat'); + ReWrite (F, 1); + BlockWrite (F, DupeData^, TotalDupes * SizeOf(RecMsgDupe)); + Close (F); + + FreeMem (DupeData, MaxDupes * SizeOf(RecMsgDupe)); + + Inherited Destroy; +End; + Constructor TPKTReader.Create; Begin Opened := False; MsgLines := 0; - MsgFile := New (PCharFile, Init(1024 * 4)); - DupeFile := New (PCharFile, Init(1024 * 8)); + MsgFile := New (PCharFile, Init(1024 * 16)); End; Destructor TPKTReader.Destroy; @@ -117,12 +202,8 @@ Begin DisposeText; If MsgFile.Opened Then MsgFile.Close; - If DupeFile.Opened Then DupeFile.Close; Dispose (MsgFile, Done); - Dispose (DupeFile, Done); - - // TRIM DUPLICATE FILE HERE Inherited Destroy; End; @@ -286,40 +367,4 @@ Begin Until False; End; -Procedure TPKTReader.AddDuplicate; -Var - F: File; -Begin - Assign (F, bbsConfig.DataPath + 'echodupes.dat'); - - If Not ioReset (F, 1, fmRWDN) Then - ioReWrite (F, 1, fmRWDN); - - Seek (F, FileSize(F)); - BlockWrite (F, MsgCRC, SizeOf(RecMsgDupe)); - Close (F); -End; - -Function TPKTReader.IsDuplicate : Boolean; -Var - Dupe : RecMsgDupe; - Res : LongInt; -Begin - Result := False; - - If Not DupeFile.Open (bbsConfig.DataPath + 'echodupes.dat') Then Exit; - - While Not DupeFile.EOF Do Begin - DupeFile.BlockRead (Dupe, SizeOf(RecMsgDupe), Res); - - If (Dupe.Text = MsgCRC.Text) and (Dupe.Header = MsgCRC.Header) Then Begin - Result := True; - - Break; - End; - End; - - DupeFile.Close; -End; - End. diff --git a/mystic/mutil_echoimport.pas b/mystic/mutil_echoimport.pas index f114183..4496c38 100644 --- a/mystic/mutil_echoimport.pas +++ b/mystic/mutil_echoimport.pas @@ -69,7 +69,9 @@ Var TotalDupes : LongInt; EchoNode : RecEchoMailNode; DupeIndex : LongInt; + DupeMBase : RecMessageBase; CreateBases : Boolean; + Dupes : TPKTDupe; Procedure ImportNetMailpacket (ArcFN: String); Var @@ -152,28 +154,23 @@ Var End; While PKT.GetMessage(False) Do Begin - If PKT.IsDuplicate Then Begin + If Dupes.IsDuplicate(PKT.MsgCRC) Then Begin Log (3, '!', ' Duplicate message found in ' + PKT.MsgArea); If DupeIndex <> -1 Then Begin - CurTag := ''; // force next real msg to get mbase record + If (MsgBase <> NIL) and (CurTag <> '-DUPEMSG-') Then Begin + MsgBase^.CloseMsgBase; - // TODO for speed: - // load dupe base first before all processsing - // add a way to not close/reopen if last was dupe (simple boolean) + Dispose (MsgBase, Done); - If GetMBaseByIndex (DupeIndex, MBase) Then Begin - If MsgBase <> NIL Then Begin - MsgBase^.CloseMsgBase; - - Dispose (MsgBase, Done); - - MsgBase := NIL; - End; - - MessageBaseOpen (MsgBase, MBase); - SavePKTMsgToBase (MsgBase, PKT, False); + MsgBase := NIL; + CurTag := '-DUPEMSG-'; End; + + If MsgBase = NIL Then + MessageBaseOpen (MsgBase, DupeMBase); + + SavePKTMsgToBase (MsgBase, PKT, False); End; Inc (TotalDupes); @@ -253,7 +250,7 @@ Var SavePKTMsgToBase (MsgBase, PKT, False); - PKT.AddDuplicate; + Dupes.AddDuplicate(PKT.MsgCRC); Inc (TotalEcho); @@ -272,8 +269,6 @@ Var PKT.MsgFile.Close; End; -// PKT.MsgFile.Close; - FileErase (TempPath + DirInfo.Name); End; @@ -324,6 +319,13 @@ Begin CreateBases := INI.ReadBoolean(Header_ECHOIMPORT, 'auto_create', False); DupeIndex := INI.ReadInteger(Header_ECHOIMPORT, 'dupe_msg_index', -1); + Count := INI.ReadInteger(Header_ECHOIMPORT, 'dupe_db_size', 32000); + + Dupes := TPKTDupe.Create(Count); + + If DupeIndex <> -1 Then + If Not GetMBaseByIndex (DupeIndex, DupeMBase) Then + DupeIndex := -1; FindFirst (bbsConfig.InboundPath + '*', AnyFile, DirInfo); @@ -367,6 +369,8 @@ Begin FindClose (DirInfo); + Dupes.Free; + ProcessStatus ('Total |15' + strI2S(TotalEcho) + ' |07echo |15' + strI2S(TotalNet) + ' |07net |15' + strI2S(TotalDupes) + ' |07dupe', True); ProcessResult (rDONE, True); End;