Wednesday, March 30, 2011
PyOpenCV - Access OpenCV Through Python!
OpenCV is an open source computer vision library written in C/C++. Here is the Python module for it.
Tuesday, March 29, 2011
Sunday, March 27, 2011
Some RPy Examples
Friday, March 25, 2011
Installing Packages in R, using Ubuntu
R... is the unintuitive statistics "programming language". I'm being overly disdainful; just because you don't like something doesn't mean it isn't useful. It has a fractal dimension package, fdim, which you can read about on CRAN.
I found this useful in getting fdim up and running, even though it has deprecated dependencies and everything.
I found this useful in getting fdim up and running, even though it has deprecated dependencies and everything.
Wednesday, March 23, 2011
Tuesday, March 22, 2011
Thursday, March 17, 2011
Example of using threading using Python
#!/usr/bin/env python
import Queue
import threading
import mahotas
import time
import sys
# this is the list where
# the final data goes
f = list()
# create a Queue
queue = Queue.Queue()
# the code that needs to be executed
# we override the init function in order to pass parameters to the threads
class MyThread( threading.Thread ):
def __init__( self, f, queue ):
self.f = f
self.queue = queue
threading.Thread.__init__( self )
def run( self ):
while True:
task = self.queue.get()
self.f.append( mahotas.features.haralick( task ) )
self.queue.task_done()
# reads the image and converts it to a numpy int ndarray
img = mahotas.imread( sys.argv[1] )
# chip size dimensions
chipx = int( sys.argv[2] )
chipy = int( sys.argv[3] )
# figure out image size
imgx, imgy, imgz = img.shape
# the number of chips in each direction
nbxchips = imgx/chipx
nbychips = imgy/chipy
start = time.time()
# create some threads
for i in range( 3 ):
t = MyThread( f, queue )
t.setDaemon( True )
t.start()
# put chips into the queue
for i in range( nbxchips ):
for j in range( nbychips ):
queue.put( img[i*chipx:(i+1)*chipx,j*chipy:(j+1)*chipy,:] )
# wait for all the work to finish
queue.join()
print 'Elapsed Time: %s' % ( time.time() - start )
print len( f )
import Queue
import threading
import mahotas
import time
import sys
# this is the list where
# the final data goes
f = list()
# create a Queue
queue = Queue.Queue()
# the code that needs to be executed
# we override the init function in order to pass parameters to the threads
class MyThread( threading.Thread ):
def __init__( self, f, queue ):
self.f = f
self.queue = queue
threading.Thread.__init__( self )
def run( self ):
while True:
task = self.queue.get()
self.f.append( mahotas.features.haralick( task ) )
self.queue.task_done()
# reads the image and converts it to a numpy int ndarray
img = mahotas.imread( sys.argv[1] )
# chip size dimensions
chipx = int( sys.argv[2] )
chipy = int( sys.argv[3] )
# figure out image size
imgx, imgy, imgz = img.shape
# the number of chips in each direction
nbxchips = imgx/chipx
nbychips = imgy/chipy
start = time.time()
# create some threads
for i in range( 3 ):
t = MyThread( f, queue )
t.setDaemon( True )
t.start()
# put chips into the queue
for i in range( nbxchips ):
for j in range( nbychips ):
queue.put( img[i*chipx:(i+1)*chipx,j*chipy:(j+1)*chipy,:] )
# wait for all the work to finish
queue.join()
print 'Elapsed Time: %s' % ( time.time() - start )
print len( f )
Tuesday, March 15, 2011
Haralick Features, and other features, too!
I found this site that describes a Python module that calculates Haralick textures, SURF features, and others.
Notable algorithms:
Notable algorithms:
- watershed.
- convex points calculations.
- hit & miss. thinning.
- Zernike & Haralick, LBP, and TAS features.
- freeimage based numpy image loading (requires freeimage libraries to be installed).
- Speeded-Up Robust Features (SURF), a form of local features.
- thresholding.
- convolution.
- Sobel edge detection
Python PCA (ii)
# Code from Chapter 10 of Machine Learning: An Algorithmic Perspective
# by Stephen Marsland (http://seat.massey.ac.nz/personal/s.r.marsland/MLBook.html)
# You are free to use, change, or redistribute the code in any way you wish for
# non-commercial purposes, but please maintain the name of the original author.
# This code comes with no warranty of any kind.
# Stephen Marsland, 2008
# An algorithm to compute PCA. Not as fast as the NumPy implementation
from pylab import *
from numpy import *
def pca(data,nRedDim=0,normalise=1):
# Centre data
m = mean(data,axis=0)
data -= m
# Covariance matrix
C = cov(transpose(data))
# Compute eigenvalues and sort into descending order
evals,evecs = linalg.eig(C)
indices = argsort(evals)
indices = indices[::-1]
evecs = evecs[:,indices]
evals = evals[indices]
if nRedDim>0:
evecs = evecs[:,:nRedDim]
if normalise:
for i in range(shape(evecs)[1]):
evecs[:,i] / linalg.norm(evecs[:,i]) * sqrt(evals[i])
# Produce the new data matrix
x = dot(transpose(evecs),transpose(data))
# Compute the original data again
y=transpose(dot(evecs,x))+m
return x,y,evals,evecs
# by Stephen Marsland (http://seat.massey.ac.nz/personal/s.r.marsland/MLBook.html)
# You are free to use, change, or redistribute the code in any way you wish for
# non-commercial purposes, but please maintain the name of the original author.
# This code comes with no warranty of any kind.
# Stephen Marsland, 2008
# An algorithm to compute PCA. Not as fast as the NumPy implementation
from pylab import *
from numpy import *
def pca(data,nRedDim=0,normalise=1):
# Centre data
m = mean(data,axis=0)
data -= m
# Covariance matrix
C = cov(transpose(data))
# Compute eigenvalues and sort into descending order
evals,evecs = linalg.eig(C)
indices = argsort(evals)
indices = indices[::-1]
evecs = evecs[:,indices]
evals = evals[indices]
if nRedDim>0:
evecs = evecs[:,:nRedDim]
if normalise:
for i in range(shape(evecs)[1]):
evecs[:,i] / linalg.norm(evecs[:,i]) * sqrt(evals[i])
# Produce the new data matrix
x = dot(transpose(evecs),transpose(data))
# Compute the original data again
y=transpose(dot(evecs,x))+m
return x,y,evals,evecs
Python PCA (i)
#!/usr/bin/env python
""" a small class for Principal Component Analysis
Usage:
p = PCA( A, fraction=0.90 )
In:
A: an array of e.g. 1000 observations x 20 variables, 1000 rows x 20 columns
fraction: use principal components that account for e.g.
90 % of the total variance
Out:
p.U, p.d, p.Vt: from numpy.linalg.svd, A = U . d . Vt
p.dinv: 1/d or 0, see NR
p.eigen: the eigenvalues of A*A, in decreasing order (p.d**2).
eigen[j] / eigen.sum() is variable j's fraction of the total variance;
look at the first few eigen[] to see how many PCs get to 90 %, 95 % ...
p.npc: number of principal components,
e.g. 2 if the top 2 eigenvalues are >= `fraction` of the total.
It's ok to change this; methods use the current value.
Methods:
The methods of class PCA transform vectors or arrays of e.g.
20 variables, 2 principal components and 1000 observations,
using partial matrices U' d' Vt', parts of the full U d Vt:
A ~ U' . d' . Vt' where e.g.
U' is 1000 x 2
d' is diag([ d0, d1 ]), the 2 largest singular values
Vt' is 2 x 20. Dropping the primes,
d . Vt 2 principal vars = p.vars_pc( 20 vars )
U 1000 obs = p.pc_obs( 2 principal vars )
U . d . Vt 1000 obs, p.obs( 20 vars ) = pc_obs( vars_pc( vars ))
fast approximate A . vars, using the `npc` principal components
Ut 2 pcs = p.obs_pc( 1000 obs )
V . dinv 20 vars = p.pc_vars( 2 principal vars )
V . dinv . Ut 20 vars, p.vars( 1000 obs ) = pc_vars( obs_pc( obs )),
fast approximate Ainverse . obs: vars that give ~ those obs.
Notes:
PCA does not center or scale A; you usually want to first
A -= A.mean(A, axis=0)
A /= A.std(A, axis=0)
with the little class Center or the like, below.
See also:
http://en.wikipedia.org/wiki/Principal_component_analysis
http://en.wikipedia.org/wiki/Singular_value_decomposition
Press et al., Numerical Recipes (2 or 3 ed), SVD
PCA micro-tutorial
iris-pca .py .png
"""
from __future__ import division
import numpy as np
dot = np.dot
# import bz.numpyutil as nu
# dot = nu.pdot
__version__ = "2010-04-14 apr"
__author_email__ = "denis-bz-py at t-online dot de"
#...............................................................................
class PCA:
def __init__( self, A, fraction=0.90 ):
assert 0 <= fraction <= 1
# A = U . diag(d) . Vt, O( m n^2 ), lapack_lite --
self.U, self.d, self.Vt = np.linalg.svd( A, full_matrices=False )
assert np.all( self.d[:-1] >= self.d[1:] ) # sorted
self.eigen = self.d**2
self.sumvariance = np.cumsum(self.eigen)
self.sumvariance /= self.sumvariance[-1]
self.npc = np.searchsorted( self.sumvariance, fraction ) + 1
self.dinv = np.array([ 1/d if d > self.d[0] * 1e-6 else 0
for d in self.d ])
def pc( self ):
""" e.g. 1000 x 2 U[:, :npc] * d[:npc], to plot etc. """
n = self.npc
return self.U[:, :n] * self.d[:n]
# These 1-line methods may not be worth the bother;
# then use U d Vt directly --
def vars_pc( self, x ):
n = self.npc
return self.d[:n] * dot( self.Vt[:n], x.T ).T # 20 vars -> 2 principal
def pc_vars( self, p ):
n = self.npc
return dot( self.Vt[:n].T, (self.dinv[:n] * p).T ) .T # 2 PC -> 20 vars
def pc_obs( self, p ):
n = self.npc
return dot( self.U[:, :n], p.T ) # 2 principal -> 1000 obs
def obs_pc( self, obs ):
n = self.npc
return dot( self.U[:, :n].T, obs ) .T # 1000 obs -> 2 principal
def obs( self, x ):
return self.pc_obs( self.vars_pc(x) ) # 20 vars -> 2 principal -> 1000 obs
def vars( self, obs ):
return self.pc_vars( self.obs_pc(obs) ) # 1000 obs -> 2 principal -> 20 vars
class Center:
""" A -= A.mean() /= A.std(), inplace -- use A.copy() if need be
uncenter(x) == original A . x
"""
# mttiw
def __init__( self, A, axis=0, scale=True, verbose=1 ):
self.mean = A.mean(axis=axis)
if verbose:
print "Center -= A.mean:", self.mean
A -= self.mean
if scale:
std = A.std(axis=axis)
self.std = np.where( std, std, 1. )
if verbose:
print "Center /= A.std:", self.std
A /= self.std
else:
self.std = np.ones( A.shape[-1] )
self.A = A
def uncenter( self, x ):
return np.dot( self.A, x * self.std ) + np.dot( x, self.mean )
#...............................................................................
if __name__ == "__main__":
import sys
csv = "iris4.csv" # wikipedia Iris_flower_data_set
# 5.1,3.5,1.4,0.2 # ,Iris-setosa ...
N = 1000
K = 20
fraction = .90
seed = 1
exec "\n".join( sys.argv[1:] ) # N= ...
np.random.seed(seed)
np.set_printoptions( 1, threshold=100, suppress=True ) # .1f
try:
A = np.genfromtxt( csv, delimiter="," )
N, K = A.shape
except IOError:
A = np.random.normal( size=(N, K) ) # gen correlated ?
print "csv: %s N: %d K: %d fraction: %.2g" % (csv, N, K, fraction)
Center(A)
print "A:", A
print "PCA ..." ,
p = PCA( A, fraction=fraction )
print "npc:", p.npc
print "% variance:", p.sumvariance * 100
print "Vt[0], weights that give PC 0:", p.Vt[0]
print "A . Vt[0]:", dot( A, p.Vt[0] )
print "pc:", p.pc()
print "\nobs <-> pc <-> x: with fraction=1, diffs should be ~ 0"
x = np.ones(K)
# x = np.ones(( 3, K ))
print "x:", x
pc = p.vars_pc(x) # d' Vt' x
print "vars_pc(x):", pc
print "back to ~ x:", p.pc_vars(pc)
Ax = dot( A, x.T )
pcx = p.obs(x) # U' d' Vt' x
print "Ax:", Ax
print "A'x:", pcx
print "max |Ax - A'x|: %.2g" % np.linalg.norm( Ax - pcx, np.inf )
b = Ax # ~ back to original x, Ainv A x
back = p.vars(b)
print "~ back again:", back
print "max |back - x|: %.2g" % np.linalg.norm( back - x, np.inf )
# end pca.py
""" a small class for Principal Component Analysis
Usage:
p = PCA( A, fraction=0.90 )
In:
A: an array of e.g. 1000 observations x 20 variables, 1000 rows x 20 columns
fraction: use principal components that account for e.g.
90 % of the total variance
Out:
p.U, p.d, p.Vt: from numpy.linalg.svd, A = U . d . Vt
p.dinv: 1/d or 0, see NR
p.eigen: the eigenvalues of A*A, in decreasing order (p.d**2).
eigen[j] / eigen.sum() is variable j's fraction of the total variance;
look at the first few eigen[] to see how many PCs get to 90 %, 95 % ...
p.npc: number of principal components,
e.g. 2 if the top 2 eigenvalues are >= `fraction` of the total.
It's ok to change this; methods use the current value.
Methods:
The methods of class PCA transform vectors or arrays of e.g.
20 variables, 2 principal components and 1000 observations,
using partial matrices U' d' Vt', parts of the full U d Vt:
A ~ U' . d' . Vt' where e.g.
U' is 1000 x 2
d' is diag([ d0, d1 ]), the 2 largest singular values
Vt' is 2 x 20. Dropping the primes,
d . Vt 2 principal vars = p.vars_pc( 20 vars )
U 1000 obs = p.pc_obs( 2 principal vars )
U . d . Vt 1000 obs, p.obs( 20 vars ) = pc_obs( vars_pc( vars ))
fast approximate A . vars, using the `npc` principal components
Ut 2 pcs = p.obs_pc( 1000 obs )
V . dinv 20 vars = p.pc_vars( 2 principal vars )
V . dinv . Ut 20 vars, p.vars( 1000 obs ) = pc_vars( obs_pc( obs )),
fast approximate Ainverse . obs: vars that give ~ those obs.
Notes:
PCA does not center or scale A; you usually want to first
A -= A.mean(A, axis=0)
A /= A.std(A, axis=0)
with the little class Center or the like, below.
See also:
http://en.wikipedia.org/wiki/Principal_component_analysis
http://en.wikipedia.org/wiki/Singular_value_decomposition
Press et al., Numerical Recipes (2 or 3 ed), SVD
PCA micro-tutorial
iris-pca .py .png
"""
from __future__ import division
import numpy as np
dot = np.dot
# import bz.numpyutil as nu
# dot = nu.pdot
__version__ = "2010-04-14 apr"
__author_email__ = "denis-bz-py at t-online dot de"
#...............................................................................
class PCA:
def __init__( self, A, fraction=0.90 ):
assert 0 <= fraction <= 1
# A = U . diag(d) . Vt, O( m n^2 ), lapack_lite --
self.U, self.d, self.Vt = np.linalg.svd( A, full_matrices=False )
assert np.all( self.d[:-1] >= self.d[1:] ) # sorted
self.eigen = self.d**2
self.sumvariance = np.cumsum(self.eigen)
self.sumvariance /= self.sumvariance[-1]
self.npc = np.searchsorted( self.sumvariance, fraction ) + 1
self.dinv = np.array([ 1/d if d > self.d[0] * 1e-6 else 0
for d in self.d ])
def pc( self ):
""" e.g. 1000 x 2 U[:, :npc] * d[:npc], to plot etc. """
n = self.npc
return self.U[:, :n] * self.d[:n]
# These 1-line methods may not be worth the bother;
# then use U d Vt directly --
def vars_pc( self, x ):
n = self.npc
return self.d[:n] * dot( self.Vt[:n], x.T ).T # 20 vars -> 2 principal
def pc_vars( self, p ):
n = self.npc
return dot( self.Vt[:n].T, (self.dinv[:n] * p).T ) .T # 2 PC -> 20 vars
def pc_obs( self, p ):
n = self.npc
return dot( self.U[:, :n], p.T ) # 2 principal -> 1000 obs
def obs_pc( self, obs ):
n = self.npc
return dot( self.U[:, :n].T, obs ) .T # 1000 obs -> 2 principal
def obs( self, x ):
return self.pc_obs( self.vars_pc(x) ) # 20 vars -> 2 principal -> 1000 obs
def vars( self, obs ):
return self.pc_vars( self.obs_pc(obs) ) # 1000 obs -> 2 principal -> 20 vars
class Center:
""" A -= A.mean() /= A.std(), inplace -- use A.copy() if need be
uncenter(x) == original A . x
"""
# mttiw
def __init__( self, A, axis=0, scale=True, verbose=1 ):
self.mean = A.mean(axis=axis)
if verbose:
print "Center -= A.mean:", self.mean
A -= self.mean
if scale:
std = A.std(axis=axis)
self.std = np.where( std, std, 1. )
if verbose:
print "Center /= A.std:", self.std
A /= self.std
else:
self.std = np.ones( A.shape[-1] )
self.A = A
def uncenter( self, x ):
return np.dot( self.A, x * self.std ) + np.dot( x, self.mean )
#...............................................................................
if __name__ == "__main__":
import sys
csv = "iris4.csv" # wikipedia Iris_flower_data_set
# 5.1,3.5,1.4,0.2 # ,Iris-setosa ...
N = 1000
K = 20
fraction = .90
seed = 1
exec "\n".join( sys.argv[1:] ) # N= ...
np.random.seed(seed)
np.set_printoptions( 1, threshold=100, suppress=True ) # .1f
try:
A = np.genfromtxt( csv, delimiter="," )
N, K = A.shape
except IOError:
A = np.random.normal( size=(N, K) ) # gen correlated ?
print "csv: %s N: %d K: %d fraction: %.2g" % (csv, N, K, fraction)
Center(A)
print "A:", A
print "PCA ..." ,
p = PCA( A, fraction=fraction )
print "npc:", p.npc
print "% variance:", p.sumvariance * 100
print "Vt[0], weights that give PC 0:", p.Vt[0]
print "A . Vt[0]:", dot( A, p.Vt[0] )
print "pc:", p.pc()
print "\nobs <-> pc <-> x: with fraction=1, diffs should be ~ 0"
x = np.ones(K)
# x = np.ones(( 3, K ))
print "x:", x
pc = p.vars_pc(x) # d' Vt' x
print "vars_pc(x):", pc
print "back to ~ x:", p.pc_vars(pc)
Ax = dot( A, x.T )
pcx = p.obs(x) # U' d' Vt' x
print "Ax:", Ax
print "A'x:", pcx
print "max |Ax - A'x|: %.2g" % np.linalg.norm( Ax - pcx, np.inf )
b = Ax # ~ back to original x, Ainv A x
back = p.vars(b)
print "~ back again:", back
print "max |back - x|: %.2g" % np.linalg.norm( back - x, np.inf )
# end pca.py
Subscribe to:
Comments (Atom)