You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

FilterChain.cpp 72KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309
  1. // FilterChain.cpp
  2. //
  3. // (C) 2002-2009 MicroNeil Research Corporation
  4. //
  5. // Main code file for module FilterChain.
  6. // 20041116 _M Added UrlDecode module. The module will repeat a decoded version of
  7. // any anchor tag that it sees which contains decodable %xx bytes. Other anchor
  8. // tags are not repeated.
  9. // 20041116 _M Upgrades to the Defunker module. The module now decodes any HTML
  10. // encoded bytes that could have been normal ascii.
  11. // 20041114 _M Completed basic defunker engine which strips out all HTML and some
  12. // basic   encoding.
  13. // 20041113 _M Began heavy upgrades to this module to improve performance and
  14. // provide additional obfuscation removal. This modification will include a move
  15. // from the use of switch(State) mechanisms to the use of function pointers. This
  16. // should save a few cycles on every byte processed.
  17. #include "FilterChain.hpp"
  18. // FilterChainBase64 Methods.
  19. // GetByte()
  20. // Returns the next byte from this filter module.
  21. unsigned char FilterChainBase64::GetByte() {
  22. switch(State) { // What are we doing?
  23. case SCANNING:{ // We're scanning to turn on...
  24. // In this mode we are hunting for a reason to turn
  25. // ourselves on. If we find our startup sequence then
  26. // we will go into decoding mode. Until then, we try
  27. // to match each incoming character with our startup
  28. // sequence.
  29. while(true) { // Search for our startup string or get out.
  30. try { // Try this...
  31. x=FilterChain::GetByte(); // Get the next byte from source.
  32. } // If we get the empty signal
  33. // here, we've failed to match.
  34. catch(Empty) { // If so - and we haven't
  35. if(0==ScanIx) throw Empty("FilterChainBase64: No more data"); // started then just throw Empty.
  36. x=Base64Start[ScanIx]-1; // If we did start then make
  37. } // sure we won't match below.
  38. // It's important that no empty's get beyond this point unless
  39. // we've got a match started. Otherwise we'll return corruption.
  40. if(x!=Base64Start[ScanIx]){ // If the byte doesnt match,
  41. // and we've started matching
  42. if(0!=ScanIx) { // the sequence then save the
  43. Buffer=x; // byte for later, change to
  44. State=DEQUEING;DequeIx=0; // DEQUING mode, and return
  45. return GetByte(); // the first Dequeued byte.
  46. }
  47. // If there's no match
  48. else return x; // started then shortcut that:
  49. } // just send back the byte.
  50. // We've handled non matches, now time for the good stuff...
  51. else { // This byte matches :-)
  52. ScanIx++; // Move forward!
  53. if(ScanIx>=sizeof(Base64Start)-1){ // If we've matched it all
  54. // then prep for decoding.
  55. // At this point we've got our trigger - but we need to
  56. // eat up any extra junk before we start decoding. What
  57. // we're looking for is a blank line (CRLFCRLF) within
  58. // the next couple of lines. While we're at this if we
  59. // get an exception we'll just pass it through.
  60. ScanIx=DequeIx=0; // Let's reset our indexes.
  61. // We're SCANNING now - so if we fail to get to good base64
  62. // stuff then we'll be starting from scratch - and that's ok.
  63. // Here we will allow some number of additional header lines
  64. // to occur before we give up on this being a base64 segment.
  65. // If we give up then we go back to scanning agian.
  66. // 20030114 _M Increased limit to 150 - lots of X- headers cause
  67. // the engine to stop decoding base64!! 30 was too small.
  68. const int LineLimit = 150; // We'll allow this many.
  69. for(int LineCount=0; LineCount<LineLimit; LineCount++) {
  70. do{ // Eat up characters through
  71. x=FilterChain::GetByte(); // the end of the line.
  72. } while(x!='\n');
  73. x=FilterChain::GetByte(); // Get the next byte.
  74. if(x=='\n'){ // Next line is blank?
  75. State=DECODING; // Then get ready to DECODE!
  76. break; // NO MORE LOOPING!
  77. }
  78. // If the line is not blank then we'll go around again up
  79. // to the number of lines we allow. Then we're done trying
  80. // and we will fall through.
  81. }
  82. // At this point we are either ready to decode base64 data
  83. // or we're still in SCANNING mode because of too much junk.
  84. if(DECODING==State) { // If we're ready to decode
  85. Workspace = 0x0000000a; // then set up a pair of
  86. DequeIx=3; // <LF> lines so they will
  87. ScanIx=2; // be the first bytes decoded.
  88. } // Here we pump <LF> into the
  89. // workspace. Then we return one <LF>
  90. return x; // (usually).
  91. // The deal is, if we're decoding then we will pump in LF and
  92. // return what must be the last LF. If we're not decoding then we
  93. // end up returning the last byte we read before giving up which should
  94. // be the first byte of the next line.
  95. }
  96. }
  97. }
  98. // The above will be tried repeatedly in the first call to
  99. // this object's GetByte() until we either return a byte or
  100. // throw an exception. The result is that once we start to match
  101. // our startup sequence we will either match all of it or we will
  102. // grab as much of it as we can until we don't match - then we'll
  103. // fail and move into DEQUEING.
  104. // You may be asking yourself, why go through all that complex
  105. // Turing engine stuff when a simple line load and string comparison
  106. // would do nicely. The answer is SPEED. Without getting too deep,
  107. // the above code will identify the startup string in roughly 2
  108. // comparisons per byte. If I were to load the entire line first
  109. // then that alone would be 2 comparisons before I got started. This
  110. // way I cut the number of comparisons down by at least 50%.
  111. break;
  112. }
  113. case DEQUEING:{ // We're recovering from a false start...
  114. // When we get here, ScanIx will be one greater than the last
  115. // matching byte. The last byte read will be stored in our buffer
  116. // so that it can be returned here as the last step. The calling
  117. // program will request each byte one at a time... starting with
  118. // the first byte coming out of this code. For all positions in our
  119. // startup string less than ScanIx, we know we had a matching input.
  120. // We start our output at the first byte. The Scanning engine should
  121. // have set our DequeIx to 0 before we got here - so that part should
  122. // be automatic. Here goes...
  123. if(DequeIx < ScanIx) { // If we're still returning a
  124. unsigned char x = // partial match, grab the next byte
  125. Base64Start[DequeIx]; // from the startup string, Increment
  126. DequeIx++; // our Deque index for next time, and
  127. return x; // return the byte that's needed.
  128. } else { // When we're done with that part,
  129. State=SCANNING; // we set our mode back to scanning,
  130. ScanIx=DequeIx=0; // reset our indexes to start again,
  131. return Buffer; // and return the unmatching byte that
  132. } // got us to DEQUEING mode.
  133. break;
  134. }
  135. case DECODING:{ // We're decoding data...
  136. // DequeIx will be used here to indicate how many decoded
  137. // bytes are ready to be delivered. This is compatible with
  138. // the normal startup for other modes.
  139. // ScanIx will be used here to indicate which byte position
  140. // we should be reading from. This combination helps to handle
  141. // pads and simplifies processing. For example, if we've got two
  142. // pads then we'll have a single byte to read starting at index
  143. // zero.
  144. // If we get an exception from up the chain while we're decoding
  145. // then we'll just pass it along.
  146. if(0==DequeIx) { // If there are no bytes ready then get some!
  147. // First Byte:
  148. // Eat anything up to the first byte that doesn't look like
  149. // a base64 digit. If we hit a '\n-' then we'll assume we've got
  150. // a segment boundary and we'll quit. Everything else will be
  151. // ignored to get us to the next line.
  152. do{ // Empty out any in-between bytes.
  153. y=x;x=FilterChain::GetByte(); // Read one byte at a time.
  154. if('-'==x && '\n'==y) { // If we get to a segment separator
  155. ScanIx=DequeIx=0; // then reset our indexes, set our
  156. State=SCANNING; // state to SCANNING...
  157. do { // Eat up the rest of this line
  158. x=FilterChain::GetByte(); // one byte at a time including
  159. } while('\n'!=x); // the <LF> at the end, then
  160. return '\n'; // return the that <LF> byte.
  161. // On the next incoming call, the scanner section "should"
  162. // return the following <LF> byte to complete the end of line.
  163. // This ensures that we put a new line at the end of our
  164. // decoded segment. Four message scanning purposes this is
  165. // desireable. If we wanted a clean segment then we'd probably
  166. // eat through the new line rather than the carriage return.
  167. }
  168. } while(XX64==Base64Table[x]); // Eat all invalid bytes.
  169. // At this point x should have the first valid byte for us :-)
  170. if('='==x) { // First byte can't be a pad.
  171. ScanIx=DequeIx=0; // If it is then we reset ourself,
  172. do{ // eat the rest of this line,
  173. y=x;x=FilterChain::GetByte(); // and then go on with scanning.
  174. }while('\n'!=x);
  175. return x;
  176. }
  177. // At this point we have a clean byte, presumably at the start
  178. // of a base64 block which we can decode.
  179. x = Base64Table[x]; // Convert the byte.
  180. // This first one we assign to clear out the register. The rest
  181. // get added to keep things in place.
  182. Workspace = // Add it to the workspace in the
  183. x << base64_seg0_shift; // correct position.
  184. // Byte number 2 of the block...
  185. x=FilterChain::GetByte(); // Grab the byte...
  186. if('='==x) { // This byte can't be a pad.
  187. ScanIx=DequeIx=0; // If it is then we reset ourself,
  188. do{ // eat the rest of this line,
  189. y=x;x=FilterChain::GetByte(); // and then go on with scanning.
  190. }while('\n'!=x);
  191. return x;
  192. }
  193. x=Base64Table[x]; // Convert the byte.
  194. if(XX64==x) { // The byte can't be invalid...
  195. ScanIx=DequeIx=0; // If it is then we reset ourself,
  196. do{ // eat the rest of this line,
  197. y=x;x=FilterChain::GetByte(); // and then go on with scanning.
  198. }while('\n'!=x);
  199. return x;
  200. }
  201. // At this point we have a clean byte...
  202. Workspace += // Add it to the workspace in the
  203. x << base64_seg1_shift; // correct position.
  204. // Byte number 3 of the block...
  205. x=FilterChain::GetByte(); // Grab the byte...
  206. // This one and the next one can be pads. Here's where we start
  207. // deciding how many bytes we have. If we have a pad in this spot
  208. // then our output bytes will only be 1.
  209. if('='==x) DequeIx = 1; // If we've got a pad here we'll only
  210. else DequeIx = 3; // have one valid output byte. Otherwise
  211. // we could have 3.
  212. x=Base64Table[x]; // Convert the byte.
  213. if(XX64==x) { // The byte can't be invalid...
  214. ScanIx=DequeIx=0; // If it is then we reset ourself,
  215. do{ // eat the rest of this line,
  216. y=x;x=FilterChain::GetByte(); // and then go on with scanning.
  217. }while('\n'!=x);
  218. return x;
  219. }
  220. // At this point we have a clean byte...
  221. Workspace += // Add it to the workspace in the
  222. x << base64_seg2_shift; // correct position.
  223. // Byte number 4 of the block...
  224. x=FilterChain::GetByte(); // Grab the byte...
  225. if('='==x && DequeIx > 2) // If we've got a pad here the most
  226. DequeIx=2; // we can have are 2 valid outputs.
  227. x=Base64Table[x]; // Convert the byte.
  228. if(XX64==x) { // The byte can't be invalid...
  229. ScanIx=DequeIx=0; // If it is then we reset ourself,
  230. do{ // eat the rest of this line,
  231. y=x;x=FilterChain::GetByte(); // and then go on with scanning.
  232. }while('\n'!=x);
  233. return x;
  234. }
  235. // At this point we have a clean byte...
  236. Workspace += // Add it to the workspace in the
  237. x << base64_seg3_shift; // correct position.
  238. // At this point we are ready to begin outputting our bytes.
  239. ScanIx=2; // Output always starts byte three.
  240. return GetByte(); // Return our first decoded byte.
  241. } else { // If there are bytes ready then spit them out.
  242. x=(Workspace >> (ScanIx * 8)) & 0xFF; // Grab the byte we want.
  243. ScanIx--; // Decrement our output index.
  244. DequeIx--; // Decrement our output count.
  245. return x; // Send back our byte.
  246. }
  247. break;
  248. }
  249. }
  250. // We should never get to this point.
  251. return 0; // Dummy to make the compiler happy.
  252. }
  253. // FilterChainQuotedPrintable Methods.
  254. // isHexDigit()
  255. // Returns true if i is a valid hex digit.
  256. bool FilterChainQuotedPrintable::isHexDigit(unsigned char i) {
  257. if(
  258. (i >= '0' && i <= '9') || // Hex digits must be 0-9 or
  259. (i >= 'A' && i <= 'F') || // A-F or
  260. (i >= 'a' && i <= 'f') // a-f if somebody used lower case.
  261. ) {
  262. return true; // If i is one of these we are true
  263. } else {
  264. return false; // IF i is not then we are false
  265. }
  266. }
  267. // convertHexDigit()
  268. // Returns an integer value for the hex digit i
  269. int FilterChainQuotedPrintable::convertHexDigit(unsigned char i) {
  270. if(i >= '0' && i <= '9') { // Digit chars convert directly.
  271. return i - '0';
  272. } else if (i >= 'A' && i <= 'F') { // Cap A-F convert to 10 - 15
  273. return i - 'A' + 10;
  274. } else if (i >= 'a' && i <= 'f') { // Small A-F convert to 10 - 15
  275. return i - 'a' + 10;
  276. }
  277. return -1; // Return -1 if i was not a hex digit!
  278. }
  279. // GetByte()
  280. // Returns the next byte from this filter module.
  281. unsigned char FilterChainQuotedPrintable::GetByte() {
  282. switch(State) { // What are we doing?
  283. case SCANNING: // We're scanning to turn on...
  284. Buffer[0]=FilterChain::GetByte();
  285. if('='== Buffer[0]) { // If we've found an = then we're on.
  286. Buffer[1]=FilterChain::GetByte(); // Fill up the decoding buffer with
  287. Buffer[2]=FilterChain::GetByte(); // the next two bytes,
  288. BufferIndex = 0; // Setup the buffer index.
  289. BufferLength = 3; // Setup the buffer length.
  290. State = DECODING; // Set our mode and get the result
  291. return GetByte(); // by calling ourselves!
  292. } else
  293. return Buffer[0]; // Otherwise just pass through.
  294. break;
  295. case DEQUEING: // We're recovering from a false start...
  296. if(BufferIndex < BufferLength) { // If we've got buffered stuff then
  297. return Buffer[BufferIndex++]; // return it and move the pointer.
  298. } else { // If we've run out of stuff then
  299. BufferIndex = 0; // Reset our index and our
  300. BufferLength = 0; // buffer length, then set our
  301. State = SCANNING; // mode to SCANNING and return
  302. return GetByte(); // the next byte from there.
  303. }
  304. break;
  305. case DECODING: // We're decoding data...
  306. // Now we are decoding quoted printable data. First we will handle the case
  307. // where this is a soft line break. In that case we simply eat the encoded bytes
  308. // and set up to dequeue the last byte.
  309. if(Buffer[1] == '\n') { // If this is a soft break the
  310. BufferIndex = 2; // point our dequeue index at the last byte
  311. State = DEQUEING; // establish our DEQUEING state and
  312. return GetByte(); // return by letteing DEQUEING do it!
  313. }
  314. // If it wasn't a soft break then we _may_ need to decode it. We will find
  315. // out by looking for hex digits in the next two locations. If they are there
  316. // we are decoding. If not then we will simply dequeue the entire buffer.
  317. if(
  318. isHexDigit(Buffer[1]) && // If the next two bytes are hex
  319. isHexDigit(Buffer[2]) // digits then we can convert them.
  320. ) {
  321. Workspace= // Set our workspace to convert the
  322. (convertHexDigit(Buffer[1]) << 4) | // two hex digits into a single
  323. (convertHexDigit(Buffer[2])); // byte.
  324. Buffer[2] = Workspace & 0xFF; // Store that byte in our buffer.
  325. BufferIndex = 2; // Set the index and change our
  326. State = DEQUEING; // state to DEQUEING then let that
  327. return GetByte(); // code spit it out!
  328. } else { // If either byte was not a valid
  329. State = DEQUEING; // hex digit DEQUEUE the entire
  330. return GetByte(); // buffer.
  331. }
  332. break;
  333. };
  334. return FilterChain::GetByte(); // Dummy
  335. }
  336. /////////////////////////////////////////////////////////////////////////////////////////
  337. // FilterChainDefunker
  338. /////////////////////////////////////////////////////////////////////////////////////////
  339. const char* DefunkerPreamble = "\n----[DEFUNKER]----\n";
  340. // Patterns to match
  341. const char* patMatchBR = "<br>";
  342. const char* patMatchP = "<p>";
  343. const char* patNBSP = "&nbsp;";
  344. const char* patAMP = "&amp;";
  345. const char* patAPOS = "&apos;";
  346. const char* patLT = "&lt;";
  347. const char* patGT = "&gt;";
  348. const char* patQUOT = "&quot;";
  349. // SkipHeaders() waits for the headers to go by before launching Store().
  350. unsigned char FilterChainDefunker::SkipHeaders() { // While waiting EOH...
  351. unsigned char x = FilterChain::GetByte(); // Get a byte.
  352. if(LastRawByte == '\n' && x == '\n') { // If we're at EOH
  353. Master = &FilterChainDefunker::Store; // Go to store mode.
  354. return x; // and return the byte.
  355. } // If we're not at EOH
  356. LastRawByte = x; // then remember this byte
  357. return x; // and return it.
  358. }
  359. // Store() puts the original data into the buffer for later.
  360. unsigned char FilterChainDefunker::Store() { // While in Store mode,
  361. unsigned char x; // we need a byte.
  362. try {
  363. if(DefunkerSize-10 < InputPosition) {
  364. cout << "watch this" << endl;
  365. }
  366. if(DefunkerSize <= InputPosition)
  367. throw Empty("FilterChainDefunker: No more data"); // Careful about the buffer.
  368. x = FilterChain::GetByte(); // Try getting the next byte
  369. StoreBuffer[InputPosition++] = x; // and storing it.
  370. }
  371. catch(Empty) { // When we get the Empty
  372. Master = &FilterChainDefunker::ReadOut; // signal it is time for us
  373. return GetByte(); // to read out our data.
  374. }
  375. return x; // Otherwis pass on the byte.
  376. }
  377. // ReadOut() retrieves the stored data through the state engine.
  378. unsigned char FilterChainDefunker::ReadOut() { // Read out and dedup spaces.
  379. if(LastReadOut == ' ') { // If the last byte was a space
  380. while(LastReadOut == ' ') { // then eat all of the spaces
  381. LastReadOut = SpaceConvChart[GetInternal()]; // that come next with spaces
  382. } // converted.
  383. } else { // If it was not a space then
  384. LastReadOut = SpaceConvChart[GetInternal()]; // simply read the next byte
  385. } // with spaces converted.
  386. return LastReadOut; // Output the byte we found.
  387. }
  388. // GetStore() retrieves the raw store for the state engine.
  389. unsigned char FilterChainDefunker::GetStore() { // Read from the Store.
  390. if(OutputPosition >= InputPosition) {
  391. throw Empty("FilterChainDefunker: No more data"); // If we're out of bytes throw Empty.
  392. }
  393. return LastGetStore = StoreBuffer[OutputPosition++]; // If we have more, trap and send it.
  394. }
  395. //// The following functions make up the state engine with the state maintained
  396. //// as a function pointer in the (*Internal)() handle.
  397. unsigned char FilterChainDefunker::Preamble() { // Emit the preamble.
  398. for(
  399. int p=0; // Load the preamble into
  400. DefunkerPreamble[p]; // the queue.
  401. p++) EnQueue(DefunkerPreamble[p]);
  402. Internal = &FilterChainDefunker::DeQueue; // Set up the DeQueue mode
  403. return GetInternal(); // and return the next byte.
  404. }
  405. unsigned char FilterChainDefunker::DefunkRoot() { // While in DefunkRoot state...
  406. unsigned char x = 0; // One byte at a time via x.
  407. do { // Loop through any emptiness.
  408. ReturnNothing = false; // Be ready to return a byte.
  409. x = GetStore(); // Grab the next byte to process.
  410. if(x == '<') { // If it matches < then
  411. Internal = &FilterChainDefunker::OpenTag; // go to OpenTag state and
  412. x = GetInternal(); // return the converted byte.
  413. } else
  414. if(x == '&') { // If it matches & then
  415. Internal = &FilterChainDefunker::OpenAmp; // go to OpenAnd state and
  416. EnQueue(x); // push in the amphersand.
  417. x = GetInternal(); // return the converted byte.
  418. }
  419. // If x is none of the above then x is just x.
  420. } while (true == ReturnNothing); // Returning nothing? Go again!
  421. return x; // otherwise return a funkless x.
  422. }
  423. unsigned char FilterChainDefunker::OpenTag() { // While in OpenTag state
  424. unsigned char x = GetStore(); // grab the next byte.
  425. switch(tolower(x)) { // Check the lower case of x.
  426. case 'b': // If we have a 'b' then
  427. Internal = &FilterChainDefunker::MatchBR; // our mode is MatchBR.
  428. break;
  429. case 'p': // If we have a 'p' then
  430. Internal = &FilterChainDefunker::MatchP; // our mode is MatchP.
  431. break;
  432. default: // If we did not match then
  433. Internal = &FilterChainDefunker::EatTag; // our mode is EatTag.
  434. break;
  435. }
  436. return GetInternal(); // Return the next byte.
  437. }
  438. unsigned char FilterChainDefunker::OpenAmp() { // While in OpenAmp state
  439. unsigned char x = GetStore(); // grab the next byte.
  440. if(tolower(x) == 'n') { // If it matched n then
  441. EnQueue(x); // push in the n -
  442. Internal = &FilterChainDefunker::MatchNBSP; // we are working on &nbsp;
  443. return GetInternal(); // return the next byte.
  444. } else
  445. if(tolower(x) == 'a') { // If it matched a then
  446. EnQueue(x); // push in the a -
  447. Internal = &FilterChainDefunker::SwitchAMPAPOS; // is it AMP or APOS?
  448. return GetInternal(); // return the next byte.
  449. } else
  450. if(tolower(x) == 'l') { // If it matched l then
  451. EnQueue(x); // push in the l -
  452. Internal = &FilterChainDefunker::MatchLT; // we are working on &lt;
  453. return GetInternal(); // return the next byte.
  454. } else
  455. if(tolower(x) == 'g') { // If it matched g then
  456. EnQueue(x); // push in the g -
  457. Internal = &FilterChainDefunker::MatchGT; // we are working on &gt;
  458. return GetInternal(); // return the next byte.
  459. } else
  460. if(tolower(x) == 'q') { // If it matched q then
  461. EnQueue(x); // push in the q -
  462. Internal = &FilterChainDefunker::MatchQUOT; // we are working on &quot;
  463. return GetInternal(); // return the next byte.
  464. } else
  465. if(x == '#') { // If it matched # then
  466. EnQueue(x); // push in the # -
  467. Internal = &FilterChainDefunker::DecodeNum; // we are working on &#...;
  468. return GetInternal(); // return the next byte.
  469. }
  470. Internal = &FilterChainDefunker::DeQueue; // If nothing matched then
  471. return GetInternal(); // punt and dequeue.
  472. }
  473. unsigned char FilterChainDefunker::MatchBR() { // If our mode is MatchBR
  474. if(MatchTagPattern(patMatchBR)) { // If we matched our pattern
  475. Internal = &FilterChainDefunker::DefunkRoot; // go to DefunkRoot state
  476. return ' '; // and return a space.
  477. } // If we did not match then
  478. Internal = &FilterChainDefunker::EatTag; // go to EatTag state and
  479. return GetInternal(); // return the next byte.
  480. }
  481. unsigned char FilterChainDefunker::MatchP() { // If our mode is MatchP
  482. if(MatchTagPattern(patMatchP)) { // if we matched our pattern
  483. Internal = &FilterChainDefunker::DefunkRoot; // go to DefunkRoot state
  484. return ' '; // and return a space.
  485. } // If we did not match then
  486. Internal = &FilterChainDefunker::EatTag; // go to EatTag state and
  487. return GetInternal(); // return the next byte.
  488. }
  489. unsigned char FilterChainDefunker::MatchNBSP() { // If our mode is MatchNBSP
  490. int pos = 2; // We've seen &n so far.
  491. while(patNBSP[pos]){ // Look through the pattern
  492. unsigned char x = GetStore(); // getting one byte at a time.
  493. EnQueue(x); // Push each into the queue.
  494. if(tolower(x)!=patNBSP[pos]) break; // If we fall off, get out.
  495. pos++; // otherwise keep going.
  496. }
  497. // At this point our pattern[pos] is either 0 (a match) or not.
  498. if(patNBSP[pos]) { // If we did not match then
  499. Internal = &FilterChainDefunker::DeQueue; // set our state to dequeue
  500. return GetInternal(); // and return the next byte.
  501. }
  502. // If we did match the pattern
  503. ClearQueue(); // then clear the queue and
  504. Internal = &FilterChainDefunker::DefunkRoot; // go back to root mode then
  505. return ' '; // return a space.
  506. }
  507. unsigned char FilterChainDefunker::MatchLT() { // If our mode is MatchLT
  508. int pos = 2; // We've seen &l so far.
  509. while(patLT[pos]){ // Look through the pattern
  510. unsigned char x = GetStore(); // getting one byte at a time.
  511. EnQueue(x); // Push each into the queue.
  512. if(tolower(x)!=patLT[pos]) break; // If we fall off, get out.
  513. pos++; // otherwise keep going.
  514. }
  515. // At this point our pattern[pos] is either 0 (a match) or not.
  516. if(patLT[pos]) { // If we did not match then
  517. Internal = &FilterChainDefunker::DeQueue; // set our state to dequeue
  518. return GetInternal(); // and return the next byte.
  519. }
  520. // If we did match the pattern
  521. ClearQueue(); // then clear the queue and
  522. Internal = &FilterChainDefunker::DefunkRoot; // go back to root mode then
  523. return '<'; // return a <.
  524. }
  525. unsigned char FilterChainDefunker::MatchGT() { // If our mode is MatchGT
  526. int pos = 2; // We've seen &g so far.
  527. while(patGT[pos]){ // Look through the pattern
  528. unsigned char x = GetStore(); // getting one byte at a time.
  529. EnQueue(x); // Push each into the queue.
  530. if(tolower(x)!=patGT[pos]) break; // If we fall off, get out.
  531. pos++; // otherwise keep going.
  532. }
  533. // At this point our pattern[pos] is either 0 (a match) or not.
  534. if(patGT[pos]) { // If we did not match then
  535. Internal = &FilterChainDefunker::DeQueue; // set our state to dequeue
  536. return GetInternal(); // and return the next byte.
  537. }
  538. // If we did match the pattern
  539. ClearQueue(); // then clear the queue and
  540. Internal = &FilterChainDefunker::DefunkRoot; // go back to root mode then
  541. return '>'; // return a >.
  542. }
  543. unsigned char FilterChainDefunker::MatchQUOT() { // If our mode is MatchQUOT
  544. int pos = 2; // We've seen &q so far.
  545. while(patQUOT[pos]){ // Look through the pattern
  546. unsigned char x = GetStore(); // getting one byte at a time.
  547. EnQueue(x); // Push each into the queue.
  548. if(tolower(x)!=patQUOT[pos]) break; // If we fall off, get out.
  549. pos++; // otherwise keep going.
  550. }
  551. // At this point our pattern[pos] is either 0 (a match) or not.
  552. if(patQUOT[pos]) { // If we did not match then
  553. Internal = &FilterChainDefunker::DeQueue; // set our state to dequeue
  554. return GetInternal(); // and return the next byte.
  555. }
  556. // If we did match the pattern
  557. ClearQueue(); // then clear the queue and
  558. Internal = &FilterChainDefunker::DefunkRoot; // go back to root mode then
  559. return '\"'; // return a quote.
  560. }
  561. unsigned char FilterChainDefunker::SwitchAMPAPOS() { // We are chosing AMP or APOS.
  562. unsigned char x = GetStore(); // Get the next byte.
  563. EnQueue(x); // Put it into the queue.
  564. if(tolower(x)=='m') { // If we matched m then we
  565. Internal = &FilterChainDefunker::MatchAMP; // are working on MatchAMP.
  566. return GetInternal(); // Go get it.
  567. } else
  568. if(tolower(x)=='p') { // If we matched p then we
  569. Internal = &FilterChainDefunker::MatchAPOS; // are working on MatchAPOS.
  570. return GetInternal(); // Go get it.
  571. }
  572. Internal = &FilterChainDefunker::DeQueue; // If we didn't match either
  573. return GetInternal(); // we punt and DeQueue.
  574. }
  575. unsigned char FilterChainDefunker::MatchAPOS() { // If our mode is MatchAPOS
  576. int pos = 3; // We've seen &ap so far.
  577. while(patAPOS[pos]){ // Look through the pattern
  578. unsigned char x = GetStore(); // getting one byte at a time.
  579. EnQueue(x); // Push each into the queue.
  580. if(tolower(x)!=patAPOS[pos]) break; // If we fall off, get out.
  581. pos++; // otherwise keep going.
  582. }
  583. // At this point our pattern[pos] is either 0 (a match) or not.
  584. if(patAMP[pos]) { // If we did not match then
  585. Internal = &FilterChainDefunker::DeQueue; // set our state to dequeue
  586. return GetInternal(); // and return the next byte.
  587. }
  588. // If we did match the pattern
  589. ClearQueue(); // then clear the queue and
  590. Internal = &FilterChainDefunker::DefunkRoot; // go back to root mode then
  591. return '\''; // return an apostrophie.
  592. }
  593. unsigned char FilterChainDefunker::MatchAMP() { // If our mode is MatchAMP
  594. int pos = 3; // We've seen &am so far.
  595. while(patAMP[pos]){ // Look through the pattern
  596. unsigned char x = GetStore(); // getting one byte at a time.
  597. EnQueue(x); // Push each into the queue.
  598. if(tolower(x)!=patAMP[pos]) break; // If we fall off, get out.
  599. pos++; // otherwise keep going.
  600. }
  601. // At this point our pattern[pos] is either 0 (a match) or not.
  602. if(patAMP[pos]) { // If we did not match then
  603. Internal = &FilterChainDefunker::DeQueue; // set our state to dequeue
  604. return GetInternal(); // and return the next byte.
  605. }
  606. // If we did match the pattern
  607. ClearQueue(); // then clear the queue and
  608. Internal = &FilterChainDefunker::DefunkRoot; // go back to root mode then
  609. return '&'; // return an amphersand.
  610. }
  611. unsigned char FilterChainDefunker::EatTag() { // If our mode is EatTag
  612. if(LastGetStore != '>') { // and our last byte was not
  613. while(GetStore()!='>')continue; // endtag then eat through
  614. } // the end tag. Then set our
  615. ReturnNothing = true; // ReturnNothing flag, set our
  616. Internal = &FilterChainDefunker::DefunkRoot; // mode to DefunkRoot and
  617. return 0; // return 0 (nothing, really).
  618. }
  619. unsigned char FilterChainDefunker::DecodeNum() { // If our mode is DecodeNum
  620. unsigned char NumBfr[5]; // A buffer for digits.
  621. memset(NumBfr,0,sizeof(NumBfr)); // Clear the buffer.
  622. for( // Let's read the number...
  623. unsigned int i=0; // NumBfr position = 0;
  624. i<(sizeof(NumBfr)-1) && // Stay well within the NunBfr.
  625. (EnQueue(NumBfr[i]=GetStore()), // Read and EnQueue each byte.
  626. isdigit(NumBfr[i])); // Keep going if it's a digit.
  627. i++)continue; // Move the buffer pointer.
  628. // Check for a proper finish...
  629. if(LastGetStore != ';') { // If we didn't end properly
  630. Internal = &FilterChainDefunker::DeQueue; // then we will punt and
  631. return GetInternal(); // DeQueue.
  632. }
  633. // At this point, NumBfr contains a c_str of the number to be decoded.
  634. // Also, the Qbfr has each byte we read in case we want to punt.
  635. int Decoded = atol((const char*)NumBfr); // Read the number.
  636. if(Decoded < 32 || Decoded > 255) { // If the number we read is
  637. Internal = &FilterChainDefunker::DeQueue; // out of range then we
  638. return GetInternal(); // punt and DeQueue.
  639. }
  640. // If we decoded a character
  641. ClearQueue(); // that is in range of normal
  642. Internal = &FilterChainDefunker::DefunkRoot; // ascii then clear the queue,
  643. return (unsigned char) Decoded; // go back to DefunkRoot, and
  644. } // return the decoded byte.
  645. /////////////////////////////////////////////////////////////////////////////////////////
  646. // FilterChainUrlDecode
  647. /////////////////////////////////////////////////////////////////////////////////////////
  648. unsigned char FilterChainUrlDecode::Bypass() { // In Bypass mode...
  649. unsigned char c = FilterChain::GetByte(); // Get the raw byte.
  650. if(c == '<') { // If it was '<' we begin.
  651. Internal = &FilterChainUrlDecode::Tag; // Go to Tag mode.
  652. AddToBfr(c); // Write the byte to our buffer.
  653. }
  654. return c; // Always return the byte.
  655. }
  656. unsigned char FilterChainUrlDecode::Tag() { // In Tag mode...
  657. unsigned char c = FilterChain::GetByte(); // Get the raw byte.
  658. if(tolower(c) == 'a') { // If we're in an anchor tag
  659. Internal = &FilterChainUrlDecode::Root; // Go to Decode Root mode.
  660. AddToBfr(c); // Write the byte to our buffer.
  661. } else
  662. if(tolower(c) == 'i') { // If we might be in an img tag
  663. Internal = &FilterChainUrlDecode::Img1; // Go to Img1 mode.
  664. AddToBfr(c); // Write the byte to our buffer.
  665. } else { // If we didn't match
  666. DecodeBfr[0] = 0; // we clear out the Decode
  667. DecodeBfr[1] = 0; // buffer. (Save some bytes by
  668. DecodeLength = 0; // doing it manually) Then we
  669. Internal = &FilterChainUrlDecode::Bypass; // Go to Bypass mode again.
  670. }
  671. return c; // Always return the byte.
  672. }
  673. unsigned char FilterChainUrlDecode::Img1() { // In Img1 mode...
  674. unsigned char c = FilterChain::GetByte(); // Get the raw byte.
  675. if(tolower(c)=='m') { // If we're still in an img tag
  676. Internal = &FilterChainUrlDecode::Img2; // Go to Img2 mode.
  677. AddToBfr(c); // Write the byte to our buffer.
  678. } else { // If we didn't match
  679. DecodeBfr[0] = 0; // we clear out the Decode
  680. DecodeBfr[1] = 0; // buffer and go back to
  681. DecodeBfr[2] = 0; // Bypass mode again.
  682. DecodeLength = 0;
  683. Internal = &FilterChainUrlDecode::Bypass;
  684. }
  685. return c; // Always return the byte.
  686. }
  687. unsigned char FilterChainUrlDecode::Img2() { // In Img2 mode...
  688. unsigned char c = FilterChain::GetByte(); // Get the raw byte.
  689. if(tolower(c)=='g') { // If we're still in an img tag
  690. Internal = &FilterChainUrlDecode::Root; // Go to Decode Root mode.
  691. AddToBfr(c); // Write the byte to our buffer.
  692. } else { // If we didn't match
  693. DecodeBfr[0] = 0; // we clear out the Decode
  694. DecodeBfr[1] = 0; // buffer and go back to
  695. DecodeBfr[2] = 0; // Bypass mode again.
  696. DecodeBfr[3] = 0;
  697. DecodeLength = 0;
  698. Internal = &FilterChainUrlDecode::Bypass;
  699. }
  700. return c; // Always return the byte.
  701. }
  702. unsigned char FilterChainUrlDecode::Root() { // While in Decode Root mode...
  703. unsigned char c = FilterChain::GetByte(); // Get the raw byte.
  704. AddToBfr(c); // Push it into the buffer.
  705. // Now we will switch modes based on the byte we get.
  706. if(c == '%') { // If we have '%' then it is
  707. Internal = &FilterChainUrlDecode::GetD1; // time to start decoding.
  708. } else
  709. if(c == '>') { // If we have '>' and
  710. if(DecodeFlag) { // we did some decoding then
  711. Internal = &FilterChainUrlDecode::Inject; // it is time to inject the result.
  712. } else { // If there was no decoding then
  713. Clear(); // we clear out our buffer and
  714. Internal = &FilterChainUrlDecode::Bypass; // it is time to go to sleep.
  715. }
  716. }
  717. // This next bit protects against malformed HTML by watching for any new tag
  718. // start. If one occurs, then we throw away our current decoding and assume a state
  719. // that starts with the new open "<".
  720. if(c == '<') { // If found a new < then we
  721. Clear(); // clear the buffer,
  722. AddToBfr(c); // Add the '<' back in, and
  723. Internal = &FilterChainUrlDecode::Tag; // go back to Tag mode.
  724. }
  725. return c; // Always return the byte.
  726. }
  727. unsigned char FilterChainUrlDecode::GetD1() { // Get the first digit.
  728. unsigned char c = FilterChain::GetByte(); // Read the raw byte.
  729. AddToBfr(c); // Add it to the buffer.
  730. Internal = &FilterChainUrlDecode::GetD2; // Move to GetD2 mode.
  731. return c; // Always return the byte.
  732. }
  733. // isHexDigit()
  734. // Returns true if i is a valid hex digit.
  735. bool FilterChainUrlDecode::isHexDigit(unsigned char i) {
  736. if(
  737. (i >= '0' && i <= '9') || // Hex digits must be 0-9 or
  738. (i >= 'A' && i <= 'F') || // A-F or
  739. (i >= 'a' && i <= 'f') // a-f if somebody used lower case.
  740. ) {
  741. return true; // If i is one of these we are true
  742. } else {
  743. return false; // IF i is not then we are false
  744. }
  745. }
  746. // convertHexDigit()
  747. // Returns an integer value for the hex digit i
  748. int FilterChainUrlDecode::convertHexDigit(unsigned char i) {
  749. if(i >= '0' && i <= '9') { // Digit chars convert directly.
  750. return i - '0';
  751. } else if (i >= 'A' && i <= 'F') { // Cap A-F convert to 10 - 15
  752. return i - 'A' + 10;
  753. } else if (i >= 'a' && i <= 'f') { // Small A-F convert to 10 - 15
  754. return i - 'a' + 10;
  755. }
  756. return -1; // Return -1 if i was not a hex digit!
  757. }
  758. // convertHexByte()
  759. // Returns an integer value for a hex string representing a byte.
  760. unsigned char FilterChainUrlDecode::convertHexByte(unsigned char* x) {
  761. unsigned char working = convertHexDigit(x[1]); // Convert the low order nybl.
  762. working = working + (16 * convertHexDigit(x[0])); // Convert the high order nybl.
  763. return working; // Return the result.
  764. }
  765. unsigned char FilterChainUrlDecode::GetD2() { // Get the second digit.
  766. unsigned char c = FilterChain::GetByte(); // Read the raw byte.
  767. AddToBfr(c); // Add it to the buffer.
  768. // At this point the end of our DecodeBfr has a c_str of a small hex integer (we hope)
  769. // that we can decode. If we successfully decode it then we will replace %xx in our
  770. // DecodeBfr with the character that is represented by that byte.
  771. // Do we really have an encoded byte to decode?
  772. int codepos = DecodeLength-3; // Grab the position of the hex.
  773. if(
  774. DecodeBfr[codepos]=='%' && // If the first char is %
  775. isHexDigit(DecodeBfr[codepos+1]) && // and the second is a hex digit
  776. isHexDigit(DecodeBfr[codepos+2]) // and the third is a hex digit
  777. ){ // then we can decode the string.
  778. unsigned char q = convertHexByte(DecodeBfr+codepos+1); // Decode the byte.
  779. if(q >= 32) { // If the byte is in range then
  780. DecodeBfr[codepos] = q; // Replace the % with the byte
  781. DecodeBfr[--DecodeLength] = 0; // backup over and erase the hex
  782. DecodeBfr[--DecodeLength] = 0; // digits themselves.
  783. DecodeFlag = true; // Set the decode flag.
  784. }
  785. // If we decided the byte was not decodable for some reason then the original data
  786. // remains in the buffer as it was originally read.
  787. }
  788. Internal = &FilterChainUrlDecode::Root; // Get ready to decode more.
  789. return c; // Always return the byte.
  790. }
  791. unsigned char FilterChainUrlDecode::Inject() { // Inject the decoded result.
  792. if(
  793. DecodeBfr[DecodePosition] && // If we've got more bytes
  794. DecodePosition < sizeof(DecodeBfr)) { // and we're safely in our buffer
  795. return DecodeBfr[DecodePosition++]; // then return the byte and move
  796. } // ahead.
  797. // Once the buffer is empty we
  798. Clear(); // clear out the system, and go
  799. Internal = &FilterChainUrlDecode::Bypass; // back to bypass mode. Then
  800. return GetByte(); // return the next bypassed byte.
  801. }
  802. ////////////////////////////////////////////////////////////////////////////////
  803. // FilterChainHeaderAnalysis
  804. ////////////////////////////////////////////////////////////////////////////////
  805. int FilterChainHeaderAnalysis::FollowPattern(char c) { // Follow the pattern.
  806. c = tolower(c); // Convert c to lower case.
  807. if(c != MatchPattern[MatchIndex]) { // If c doesn't match the pattern
  808. return -1; // then return -1 indicating we fell off.
  809. } else { // If it did match the pattern then
  810. MatchIndex++; // move ahead to the next byte and
  811. if(0 == MatchPattern[MatchIndex]) { // take a look. If that's all there was
  812. return 0; // then we've finished :-)
  813. }
  814. } // If we matched and there's more to do
  815. return 1; // then we return 1.
  816. }
  817. unsigned char FilterChainHeaderAnalysis::doSeekNL() { // Looking for a new line.
  818. unsigned char c = GetCheckedByte(); // Get the next byte (and check for high bits)
  819. if('\n' == c) { // If it was a new line then
  820. Mode = &FilterChainHeaderAnalysis::doSeekDispatch; // move on to the next mode
  821. } // for the next byte and
  822. return c; // return the byte we got.
  823. }
  824. unsigned char FilterChainHeaderAnalysis::doSeekDispatch() { // Looking at the first char after NL.
  825. unsigned char c = GetCheckedByte(); // Get the next byte (and check for high bits)
  826. switch(tolower(c)) { // Switch modes based on what this byte is.
  827. case '\n': { // If it is a New Line then the headers are
  828. Mode = &FilterChainHeaderAnalysis::doEndOfHeaders; // finished - so we set up our EndOfHeaders
  829. return GetByte(); // mode and return the next byte from there.
  830. break; // The extra NL will be emitted at the end.
  831. }
  832. case 'r': { // If it is an R as in (R)eceived:
  833. SetFollowPattern("eceived:"); // establish the follow pattern and
  834. Mode = &FilterChainHeaderAnalysis::doReceived; // switch to doReceived mode.
  835. break;
  836. }
  837. case 'f': { // If it is an F as in (F)rom:
  838. SetFollowPattern("rom:"); // establish the follow pattern and
  839. Mode = &FilterChainHeaderAnalysis::doFrom; // switch to doFrom mode.
  840. break;
  841. }
  842. case 't': { // If it is an T as in (T)o:
  843. SetFollowPattern("o:"); // establish the follow pattern and
  844. Mode = &FilterChainHeaderAnalysis::doTo; // switch to doTo mode.
  845. break;
  846. }
  847. case 'c': { // If it is a C as in (C)C:
  848. SetFollowPattern("c:"); // establish the follow pattern and
  849. Mode = &FilterChainHeaderAnalysis::doCC; // switch to doCC mode.
  850. break;
  851. }
  852. case 'm': { // If it is an M as in (M)essage-id:
  853. SetFollowPattern("essage-id:"); // establish the follow pattern and
  854. Mode = &FilterChainHeaderAnalysis::doMessageID; // switch to doMessageID mode.
  855. break;
  856. }
  857. case 'd': { // If it is a D as in (D)ate:
  858. SetFollowPattern("ate:"); // establish the follow pattern and
  859. Mode = &FilterChainHeaderAnalysis::doDate; // switch to doDate mode.
  860. break;
  861. }
  862. case 's': { // If it is an S as in (S)ubject:
  863. SetFollowPattern("ubject:"); // establish the follow pattern and
  864. Mode = &FilterChainHeaderAnalysis::doSubject; // switch to doSubject mode.
  865. break;
  866. }
  867. default: { // If we don't recognize the byte then
  868. Mode = &FilterChainHeaderAnalysis::doSeekNL; // go back to looking for a new line.
  869. break;
  870. }
  871. } // Once all of our mode switching is handled
  872. return c; // we return the byte we got.
  873. }
  874. unsigned char FilterChainHeaderAnalysis::doReceived() { // Identifying a Received: header.
  875. unsigned char c = FilterChain::GetByte(); // Get the next byte of the header tag.
  876. switch(FollowPattern(c)) { // See if we're still on the path.
  877. case -1: { // If we're not on the right tag then
  878. Mode = &FilterChainHeaderAnalysis::doSeekNL; // go back to looking for the next one.
  879. break;
  880. }
  881. case 0: { // If we've found the end of our tag (match!)
  882. Mode = &FilterChainHeaderAnalysis::doFindIP; // start looking for the IP.
  883. IPToTest = ""; // Clear the IPToTest buffer.
  884. break;
  885. }
  886. default: { // If we're still following along then
  887. break; // keep on keepin' on.
  888. }
  889. } // Once we know what we're doing we
  890. return c; // return the character we got.
  891. }
  892. unsigned char FilterChainHeaderAnalysis::doFindIP() { // Seeking the [IP] in a Received header.
  893. unsigned char c = GetCheckedByte(); // Get a checked byte.
  894. switch(c) {
  895. case '[': { // If we find the [ then
  896. Mode = &FilterChainHeaderAnalysis::doTestIP; // set up to grab and test the IP.
  897. break;
  898. }
  899. case '\n': { // If we come across a newline then
  900. Mode = &FilterChainHeaderAnalysis::doSeekNL; // we must be lost so go back to basics.
  901. break;
  902. }
  903. default: { // For anything else we keep on going.
  904. break;
  905. }
  906. }
  907. return c; // Return the byte.
  908. }
  909. //// 20070614 _M Improved IP exctaction from received headers so that if the
  910. //// apparent IP contains any unusual bytes (not digits or dots) then the
  911. //// attempt is abandoned.
  912. unsigned char FilterChainHeaderAnalysis::doTestIP() { // Gets and tests the [IP].
  913. unsigned char c = FilterChain::GetByte(); // Get the next byte.
  914. switch(c) {
  915. case ']': { // If we come to ] we've got it!
  916. IPTester.test(IPToTest, IPTestResult); // Do the test with this IP.
  917. if(0 == IPTestResult.length()) { // If the IP test wants us to truncate
  918. throw Empty("FilterChainHeaderAnalysis: Truncate"); // the message then throw Empty!
  919. } // Otherwise, proceed as per normal...
  920. SetOutputBuffer(IPTestResult); // Put the result in the output buffer.
  921. Mode = &FilterChainHeaderAnalysis::doInjectIPTestResult; // Set the mode to inject the result.
  922. break; // That will start on the next byte.
  923. }
  924. case '0': // IPs are made of digits and dots.
  925. case '1':
  926. case '2':
  927. case '3':
  928. case '4':
  929. case '5':
  930. case '6':
  931. case '7':
  932. case '8':
  933. case '9':
  934. case '.': { // Capture the IP between [ and ]
  935. IPToTest += c; // one byte at a time.
  936. break;
  937. }
  938. default: { // If we find anything else we must be
  939. Mode = &FilterChainHeaderAnalysis::doSeekNL; // lost so we go back to the basics.
  940. break;
  941. }
  942. }
  943. return c;
  944. }
  945. unsigned char FilterChainHeaderAnalysis::doFrom() { // Identifying a From: header.
  946. unsigned char c = FilterChain::GetByte(); // Get the next byte of the header tag.
  947. switch(FollowPattern(c)) { // See if we're still on the path.
  948. case -1: { // If we're not on the right tag then
  949. Mode = &FilterChainHeaderAnalysis::doSeekNL; // go back to looking for the next one.
  950. break;
  951. }
  952. case 0: { // If we've found the end of our tag (match!)
  953. Mode = &FilterChainHeaderAnalysis::doSeekNL; // start looking for the the next tag and
  954. FoundFrom = true; // record that this tag was present.
  955. break;
  956. }
  957. default: { // If we're still following along then
  958. break; // keep on keepin' on.
  959. }
  960. } // Once we know what we're doing we
  961. return c; // return the character we got.
  962. }
  963. unsigned char FilterChainHeaderAnalysis::doTo() { // Identifying a To: header.
  964. unsigned char c = FilterChain::GetByte(); // Get the next byte of the header tag.
  965. switch(FollowPattern(c)) { // See if we're still on the path.
  966. case -1: { // If we're not on the right tag then
  967. Mode = &FilterChainHeaderAnalysis::doSeekNL; // go back to looking for the next one.
  968. break;
  969. }
  970. case 0: { // If we've found the end of our tag (match!)
  971. Mode = &FilterChainHeaderAnalysis::doSeekNL; // start looking for the the next tag and
  972. FoundTo = true; // record that this tag was present.
  973. break;
  974. }
  975. default: { // If we're still following along then
  976. break; // keep on keepin' on.
  977. }
  978. } // Once we know what we're doing we
  979. return c; // return the character we got.
  980. }
  981. unsigned char FilterChainHeaderAnalysis::doCC() { // Identifying a CC: header.
  982. unsigned char c = FilterChain::GetByte(); // Get the next byte of the header tag.
  983. switch(FollowPattern(c)) { // See if we're still on the path.
  984. case -1: { // If we're not on the right tag then
  985. Mode = &FilterChainHeaderAnalysis::doSeekNL; // go back to looking for the next one.
  986. break;
  987. }
  988. case 0: { // If we've found the end of our tag (match!)
  989. Mode = &FilterChainHeaderAnalysis::doSeekNL; // start looking for the the next tag and
  990. FoundCC = true; // record that this tag was present.
  991. break;
  992. }
  993. default: { // If we're still following along then
  994. break; // keep on keepin' on.
  995. }
  996. } // Once we know what we're doing we
  997. return c; // return the character we got.
  998. }
  999. unsigned char FilterChainHeaderAnalysis::doMessageID() { // Identifying a MessageID header.
  1000. unsigned char c = FilterChain::GetByte(); // Get the next byte of the header tag.
  1001. switch(FollowPattern(c)) { // See if we're still on the path.
  1002. case -1: { // If we're not on the right tag then
  1003. Mode = &FilterChainHeaderAnalysis::doSeekNL; // go back to looking for the next one.
  1004. break;
  1005. }
  1006. case 0: { // If we've found the end of our tag (match!)
  1007. Mode = &FilterChainHeaderAnalysis::doSeekNL; // start looking for the the next tag and
  1008. FoundMessageID = true; // record that this tag was present.
  1009. break;
  1010. }
  1011. default: { // If we're still following along then
  1012. break; // keep on keepin' on.
  1013. }
  1014. } // Once we know what we're doing we
  1015. return c; // return the character we got.
  1016. }
  1017. unsigned char FilterChainHeaderAnalysis::doDate() { // Identifying a Date: header.
  1018. unsigned char c = FilterChain::GetByte(); // Get the next byte of the header tag.
  1019. switch(FollowPattern(c)) { // See if we're still on the path.
  1020. case -1: { // If we're not on the right tag then
  1021. Mode = &FilterChainHeaderAnalysis::doSeekNL; // go back to looking for the next one.
  1022. break;
  1023. }
  1024. case 0: { // If we've found the end of our tag (match!)
  1025. Mode = &FilterChainHeaderAnalysis::doSeekNL; // start looking for the the next tag and
  1026. FoundDate = true; // record that this tag was present.
  1027. break;
  1028. }
  1029. default: { // If we're still following along then
  1030. break; // keep on keepin' on.
  1031. }
  1032. } // Once we know what we're doing we
  1033. return c; // return the character we got.
  1034. }
  1035. unsigned char FilterChainHeaderAnalysis::doSubject() { // Identifying a Subject: header.
  1036. unsigned char c = FilterChain::GetByte(); // Get the next byte of the header tag.
  1037. switch(FollowPattern(c)) { // See if we're still on the path.
  1038. case -1: { // If we're not on the right tag then
  1039. Mode = &FilterChainHeaderAnalysis::doSeekNL; // go back to looking for the next one.
  1040. break;
  1041. }
  1042. case 0: { // If we've found the end of our tag (match!)
  1043. Mode = &FilterChainHeaderAnalysis::doSeekNL; // start looking for the the next tag and
  1044. FoundSubject = true; // record that this tag was present.
  1045. break;
  1046. }
  1047. default: { // If we're still following along then
  1048. break; // keep on keepin' on.
  1049. }
  1050. } // Once we know what we're doing we
  1051. return c; // return the character we got.
  1052. }
  1053. unsigned char FilterChainHeaderAnalysis::doEndOfHeaders() { // IdentifyEndOfHeaders & Emit Results.
  1054. // We know we've reached the end of the headers so now
  1055. // we have to formulate the results we want to inject and
  1056. // er... inject them.
  1057. EndOfHeaderResults = "X-SNFHDR: "; // Emit an X header (internal only)
  1058. if(MissingCC()) { EndOfHeaderResults.append("-CC "); } // Emit -CC if no CC header.
  1059. if(MissingTo()) { EndOfHeaderResults.append("-TO "); } // Emit -TO if no TO header (together no to)
  1060. if(MissingFrom()) { EndOfHeaderResults.append("-FROM "); } // Emit -FROM if no FROM header.
  1061. if(MissingDate()) { EndOfHeaderResults.append("-DATE "); } // Emit -DATE if no DATE header.
  1062. if(MissingMessageID()) { EndOfHeaderResults.append("-MESSAGEID "); } // Emit -MESSAGEID if no MESSAGE-ID header.
  1063. if(MissingSubject()) { EndOfHeaderResults.append("-SUBJECT "); } // Emit -SUBJECT if no SUBJECT header.
  1064. if(HighBitCharacters()) { EndOfHeaderResults.append("+HIGHBIT"); } // Emit +HIGHBIT if non-ascii chars present.
  1065. EndOfHeaderResults.append("\n\n"); // Emit the double newline - end of headers.
  1066. SetOutputBuffer(EndOfHeaderResults); // Setup the output string.
  1067. Mode = &FilterChainHeaderAnalysis::doInjectAnalysis; // Switch to the output injection mode.
  1068. return GetByte(); // Return the first byte from there :-)
  1069. }
  1070. void FilterChainHeaderAnalysis::SetOutputBuffer(string& s) { // Setup the OutputBuffer.
  1071. OutputBuffer = (char*) s.c_str(); OutputIndex = 0; // Capture the c_str and reset the index.
  1072. }
  1073. unsigned char FilterChainHeaderAnalysis::doInjectIPTestResult() { // Inject OutputBuffer and go to doSeekNL.
  1074. unsigned char c = OutputBuffer[OutputIndex++]; // Get the next byte in the output buffer.
  1075. if(0 == c) { // If it is the null terminator then we
  1076. Mode = &FilterChainHeaderAnalysis::doSeekNL; // go back to seeking lines and return that
  1077. return GetByte(); // byte instead.
  1078. } // If we have a normal byte then we
  1079. return c; // return it.
  1080. }
  1081. unsigned char FilterChainHeaderAnalysis::doInjectAnalysis() { // Inject OutputBuffer and go to doOff.
  1082. unsigned char c = OutputBuffer[OutputIndex++]; // Get the next byte in the output buffer.
  1083. if(0 == c) { // If it is the null terminator then we
  1084. Mode = &FilterChainHeaderAnalysis::doOff; // go back to seeking lines and return that
  1085. return GetByte(); // byte instead.
  1086. } // If we have a normal byte then we
  1087. return c; // return it.
  1088. }