Code Snippets ---
This page is to be an assemblage of pieces and parts that the author has constructed for various projects ...
It's nice to have a place to collect half-thoughts and potential code bits for future (re)use. So, this page will suffice towards that purpose.
Some of these will be junkers, some not ...
Probably lots of Python, C/C++, and not much of anything else ...
All junkyard bits with no guarantee ...
Snippet #1: A simple use of Python lists (Python lists rock!) to extract some mailing-list entries from the Four-Calorie-Mail daemon raw-data output stream ...
lstMessages = re.split("Beginning.Of.Message", strAllLines)
if (lstMessages != None):
for strMessage in lstMessages:
lstCheck = re.split("Subject. Re.", strMessage)
if (lstCheck != None):
intCount = 0
for strPiece in lstCheck:
if (intCount > 0):
strMessages += strPiece
intCount += 1
Snippet #2: Simple parser for OS statistics (Python lists rock!) for Fourcalorieservers statistics page
def getOpSysID(self, strScan, curPgOpSys):
# Dynamic OS detail getter (see getSysString for hard coded version)
strScan = strScan.lower()
strTest = ""
strSQLOpSystems = u"select cnpkoperatingsystems, caoperatingsystem from tblOperatingSystems;"
curPgOpSys.execute(strSQLOpSystems)
lstRows = curPgOpSys.fetchall()
if (lstRows != None):
if (len(lstRows) > 0):
for row in lstRows:
strTest = row[1]
strTest = strTest.lower()
strTest = strTest.rstrip()
if (strScan.find(strTest) != (-1)):
return (row[0])
# 1 = default (Unknown)
return 1
Snippet #3: A simple use of Python to parse search engine URLs
#!/usr/bin/env python
# built in python modules we will use
import sys
import base64
import os
import string
import time
import random
import re
import pgdb
# import DB2
from socket import *
import select
from HTMLParser import HTMLParser
class THTMLParser(HTMLParser):
lstUrls = []
strRowIP = ""
strRowDomain = ""
def handle_starttag(self, tag, attrs):
# Create a tuple for the URL data, then
# append if the data references a link
tpl = (self.strRowIP, self.strRowDomain, attrs)
if (tag == 'a'):
self.lstUrls.append(tpl)
class ZorbSearch():
def extractUrls(self, objHTMLParser, strRawData, strRowIP, strRowDomain):
intReturn = 0
objHTMLParser.strRowIP = strRowIP
objHTMLParser.strRowDomain = strRowDomain
try:
objHTMLParser.feed(strRawData)
intReturn = 0
except Exception, e:
intReturn = 1
return intReturn
def mainFunc():
objHTMLParser = THTMLParser()
strSQL = u"select cafirstchunk, caipaddress, cadomain from zorb1 "
curPgSQL.execute(strSQL)
lstRows = curPgSQL.fetchall()
intReturn = 0
if (lstRows != None):
if (len(lstRows) > 0):
for row in lstRows:
strRawData = row[0]
strRowIP = row[1]
strRowDomain = row[2]
strRowIP = string.rstrip(strRowIP)
intReturn = self.extractUrls(objHTMLParser, strRawData, strRowIP, strRowDomain)
if (intReturn == 1):
objHTMLParser.reset()
# the HtmlParser tends to throw exceptions on badly formed html,
# so in most cases the following break will be removed
break
if (intReturn == 0):
if (len(objHTMLParser.lstUrls) != 0):
strItem = ""
for item in objHTMLParser.lstUrls:
# The first and second items in the first tuple are IP and Domain
# The third item in the first tuple is itself a tuple, and is our
# parsed url info
# Here there is some ambiguity from the parser, so we should create
# a function to test the parsed values ...
# Some test needs to be executed to determine the location
# of the url in the tuple, base on length of tuple, etc ...
try:
intLen = len(item)
# strItem = str(item[2][1][1])
# print (strItem) # or make cleaned list for public property, etc)
except Exception, e:
intReturn = 1
break
else:
intReturn = 2
else:
intReturn = 3
else:
intReturn = 4
return intReturn
# # # # # # # # # # # # # # # # # # # # # # #
# Create connections and initial objects #
# # # # # # # # # # # # # # # # # # # # # # #
try:
connPgSQL = pgdb.connect(database='zorbator', host='localhost', password='masterzorb', user='littlezorb')
if (connPgSQL != None):
curPgSQL = connPgSQL.cursor()
objSearch = ZorbSearch()
if (objSearch != None):
intReturn = objSearch.mainFunc()
if (intReturn != 0):
print "Error"
else:
print "Done"
curPgSQL.close()
connPgSQL.close()
else:
print "Unable to create database connection object"
except Exception, e:
print "Exception: ",e
Hopefully the aforegoing is obvious -- I have previously accumulated rows in a database (PostgreSQL of course), and
each row contains an IP address, a domain name, and a chunk of data extracted from the index.html page of the website
that is associated with the IP/Domain.
The routine goes through the chunk of data to extract more URLs. A recursive use of the collected URLs creates a portion
of the functionality needed for a search engine. The linked "spider data" may then be rolled into a hierarchical view in
the zorbator database.
Snippet #4: A simple use of Python to resolve an IP address to a domain name
#!/usr/bin/env python
# built in python modules we will use
import sys
import base64
import os
import string
import time
import random
import re
import pgdb
# import DB2
from socket import *
import select
def resolveIPtoDomain(self, strRowIP):
try:
# Note gethostbyaddr returns tuple, first element 0 is host
strRowDomainName = str(gethostbyaddr(strRowIP)[0])
return strRowDomainName
except Exception, e:
return strRowIP
Snippet #5: A simple use of Python to resolve a domain name to an IP address
#!/usr/bin/env python
# built in python modules we will use
import sys
import base64
import os
import string
import time
import random
import re
import pgdb
# import DB2
from socket import *
import select
from urlparse import urlparse
def getAddressForName(self, strDName):
try:
tplURL = urlparse(strDName)
strDomain = str(tplURL[0])
strRowAddress = gethostbyname(strDomain)
return strRowAddress
except Exception, e:
return ''
Attributions:
For information about Python see http://www.python.org ---- Note that this site is not
affiliated in any way ...
Content copyright © Datazygte, Inc 2009 -- Contributions by Ron Scheckelhoff