import os, fnmatch, hashlib, sqlite3, socket, datetime, time

#############################################################################################################
#
# File Scanner. Copyright (C) 2008 by idlethreat (tgiles at gmail dot com). All rights reserved.
# This program is released under Creative Commons by-nc-sa. http://creativecommons.org/licenses/by-nc-sa/3.0/us/
# Read the license. Know your rights.
#
# This script iterates over all files on a system (or in a directory- check out the variables), calculates a sha1 hash, and injects the
# value into a local sqlite 3 database. I was inspired somewhat by security applications like Tripwire, which effectively perform this
# to check for file system changes on a computer.
#
# While not tested, this code should compile and work on either a Windows or Linux system with no changes.
# Developed on on OSX, 10.4.8 and Python 2.5.2
#
#############################################################################################################

# variables!

databasefile = 'database.db' # name of the databse we'll be saving things to. This is in current directory.
wheretoscan = '/home/foo' # top directory where scans will be performed at. Change this.


# Determine my local IP address
myipaddress = socket.gethostbyname(socket.gethostname())
# Determine my local hostname
myhostname = socket.gethostname()

# checking to see if local SQLite3 DB file is there. Will create if not.

isfile = os.path.isfile(databasefile)

if isfile == False:
    #file is not there
    conn = sqlite3.connect(databasefile)
    c = conn.cursor()
    c.execute('''create table system (timescanned text, myhostname text, myipaddress text, path text, sha text)''')
    conn.commit()
    c.close() 
    exit
else:
    # file is there. twiddle thumbs a bit.
    

def scanner(toscan):
    try:
        # Open each file in memory and pass it to a sha1 hash.
        # There's currently no limit, so the system may inadvertantly open up a 10GB file, eat up all available
        # memory and blow everything up. TOFIX
        
        f = open(toscan, 'r').read()
        myhash = hashlib.sha1(f).hexdigest()

        # This pulls the exact epoch datetime when the file was scanned.
        t = datetime.datetime.now()
        timescanned = time.mktime(t.timetuple())

        # a debugging print statement if you want to see what's going on.
        # print timescanned, myhostname, myipaddress, toscan, myhash

        # Make a SQLite connection
        conn = sqlite3.connect(databasefile)
        c = conn.cursor()
        # Insert a row of data
        # Found this solution here:
        # http://www.thescripts.com/forum/thread690787.html

        query = "insert into system values ('" + str(timescanned) +"', '" + myhostname + "', '" + myipaddress + "', '" + toscan + "', '" + myhash + "')"

        # A debugging print statement if you want to see what the database query will look like
        # print query

        c.execute(query)
        # Save (commit) the changes to database and move to the next item.
        conn.commit()
    except:
        pass
        # If I can't open a file, I'll ignore and move on to the next one.
        # This should actually track the names of the files which couldn't be scanned for remediation later. TOFIX
    

for path, dirs, files in os.walk(wheretoscan):
    for filetoscan in [os.path.abspath(os.path.join(path, filename)) for filename in files if fnmatch.fnmatch(filename, '*.*')]:
        scanner (filetoscan)
        # The above os.walk() will go through all of the directories and files and pass it to the 'scanner()',
        # which has the task of gathering information
        # and then putting it into the database. In a perfect world, i'd have a insertintodatabase() function and whattimeisit() function
        # broken out as well. TOFIX
