"""
flib.py
A collection of useful routines.

Date and time: 'str2tm', 'tm', 'tm2str', 'dt2str', 'dt2filename', 'labview2tm' and 'tm2labview'
Saving/loading data files: 'loadbinary', 'savebinary', 'loadascii', 'saveascii',
    'swapendian', 'savenum', 'loadnum', 'savestr', 'loadstr'
String formatting: 'a2str'
Array Handling: 'average', 'absdiff', 'finites'
Complex numbers: 'phase'
Number comparision: 'floatcmp'

Plotting: 'plotdate'

7 Jul 2009, Lev Levitin
"""
# Incomplete Changelog
#
# ...
# 28 May 2008
# Add 'poly2str' function
# Add 'pack' option to 'saveascii' function
#
# 17 Dec 2008
# Add abberviation option to 'a2str'
# Add 'finites' function
# 
# 1 Feb 2009
# move 'plotdate' to 'deco.py', remove a reference to 'pylab', this speeds up loading of 'flib'
#
# 7 Feb 2009
# move 'poly2str' to 'deco.py'
#
# 17 Jun 2009
# add 'ireplace' case insensitive replace function
#
# 7 Jul 2009
# fully implement "ver00002" binary I/O
#
# 10 Jul 2009
# drop old interface to savebinary, modify the callers.
#
# 13 Sep 2009
# improve the performance of 'average'
#
# 25 Jul 2010
# set 'keep2D=True' by default in 'loadascii', previously was False
#
# 24 Nov 2010
# add 'c' function that is a shortcut to numpy.concatenate

import time, datetime, dateutil
import numpy, math
import os, os.path, sys, fnmatch, gc

#import pylab

def str2tm(string, parserinfo=None, **kwargs):
    """
    Convert a string into seconds from UNIX Epoch
    using the intelligent dateutil.parser.parse() routine
    with dayfirst=True argument, unless specified.
    """

    if not kwargs.has_key('dayfirst'):
        kwargs['dayfirst'] = True
    
    return time.mktime(dateutil.parser.parse(string, parserinfo, **kwargs).timetuple())

def tm(t):
    """
    Convert a timestamp from a range of formats into seconds from UNIX Epoch.
    Handle an array of dates.
    
    Call flib.str2tm if the argument is a string, returns the raw value if the argument is a scalar.
    If an array is supplied, every element is handled as above, and a numpy.array of numbers is returned.
    """
    if t is None:
        return t
    if not numpy.isscalar(t):
        return numpy.array([tm(e) for e in t])
    elif isinstance(t, (str, unicode)):
        return str2tm(t)
    else:
        return t

def tm2str(t):
    """
    Convert a timestamp or an array of timestamps in
    C and UNIX format (seconds since 00:00 1 Jan 1970 UTC)
    into 'dd mmm yyyy HH:MM:SS' human readable strings according to locale
    """
    if not numpy.isscalar(t):
        return [tm2str(e) for e in t]
    else:
        return time.strftime('%d %b %Y %H:%M:%S', time.localtime(t))

def gmt2str(t):
    """
    Convert a timestamp or an array of timestamps in
    C and UNIX format (seconds since 00:00 1 Jan 1970 UTC)
    into 'dd mmm yyyy HH:MM:SS UTC' human readable strings in a UTC timezone
    """
    if not numpy.isscalar(t):
        return [gmt2str(e) for e in t]
    else:
        return time.strftime('%d %b %Y %H:%M:%S UTC', time.gmtime(t))

def tmHMS2str(t):
    """
    Convert a timestamp or an array of timestamps in
    C and UNIX format (seconds since 00:00 1 Jan 1970 UTC)
    into 'HH:MM:SS' human readable strings according to locale
    """
    if not numpy.isscalar(t):
        return [tm2str(e) for e in t]
    else:
        return time.strftime('%H:%M:%S', time.localtime(t))

def dt2str(t):
    """
    Convert date in a timestamp or an array of timestamps in
    C and UNIX format (seconds since 00:00 1 Jan 1970 UTC)
    into 'dd mmm yyyy' human readable string(s) according to locale
    """
    if not numpy.isscalar(t):
        return [dt2str(e) for e in t]
    else:
        return time.strftime('%d %b %Y', time.localtime(t))

def dt2filename(t):
    """
    Convert date in a timestamp or an array of timestamps in
    C and UNIX format (seconds since 00:00 1 Jan 1970 UTC)
    into 'yyyymmdd' string(s) used for log file names
    """    
    if not numpy.isscalar(t):
        return [dt2filename(e) for e in t]
    else:
        return time.strftime('%Y%m%d', time.localtime(t))
    
def timespan2str(start, end):
    """
    Convert a pair of times in any format understood by 'flib.tm()' into a
    string of form '<start time>-<end time>' where the date is omitted from the
    <end> time if both <start> and <end> have the same date.
    """
    if dt2str(start) == dt2str(end):
        return "%s-%s" % (tm2str(start), tmHMS2str(end))
    else:
        return "%s-%s" % (tm2str(start), tm2str(end))

def labview2tm(t):
    """
    Convert a timestamp, or an array of timestamps from
    Labview format (seconds since 00:00 1 Jan 1904 UTC) to
    C and UNIX format (seconds since 00:00 1 Jan 1970 UTC)
    """
    if not numpy.isscalar(t):
        t = numpy.array(t)
    return t - 2082844800

def tm2labview(t):
    """
    Convert a timestamp, or an array of timestamps from
    C and UNIX format (seconds since 00:00 1 Jan 1970 UTC)
    to Labview format (seconds since 00:00 1 Jan 1904 UTC) 
    """
    if not numpy.isscalar(t):
        t = numpy.array(t)
    return t + 2082844800

def datespan(start, end):
    """
    Return a string representating of date span of given two moments in time
    """
    start = dt2str(tm(start))
    end = dt2str(tm(end))
    if start == end: return start
    else: return start + "-" + end

def swapendian():
    """
    Determines byte order for loading/saving binary data
    """
    return True

# settings for binary data I/O
__bin_magic__ = '\x7f\xf8\xf8\x7f\x7f\xf8\xf8\x7f' # __bin_magic__ - NaN in both float32 and float64 with either endians.
__bin_version_len__ = 8 # version string length
__bin_size_type__ = numpy.uint64 # data type for storing lengths
__bin_dtype_len__ = 8 # data type string length

def savenum(file, n, dtype=numpy.uint64):
    """
    Save a number 'n' to a binary file pointed by a handle 'file'
    optional 'dtype' argument specify a format for a number to be stored in,
    defaults to unsigned 64 bit integer. Use big endian format.
    """
    n = dtype(n)
    if n.dtype.str[0] == '<':
        n = n.byteswap()
    n.tofile(file)

def loadnum(file, dtype=numpy.uint64):
    """
    Load a number from a binary file pointed by a handle 'file'.
    Optional 'dtype' argument specifies a format for a number to be loaded from,
    defaults to unsigned 64 bit integer. Use big endian format.
    """
    n = numpy.fromfile(file, dtype=dtype, count=1)
    if n.dtype.str[0] == '<':
        n = n.byteswap()
    return n[0]

def savestr(file, string):
    """
    Save a string into a binary file, preceeded with its length encoded into
    a big endian 64 bit unsigned integer
    """
    savenum(file, len(string), dtype=__bin_size_type__)
    file.write(string)

def loadstr(file):
    """
    Load a string from a binary file. First length is read as
    a big endian 64 bit unsigned integer, then the contents.
    """
    return file.read(loadnum(file, dtype=__bin_size_type__))

def savebinary(filename, data):
    """
    Save numeric data into a binary file.
    
    Argument formats:
        'data' - dictionary of sections to be stored.
        keys should be strings
        values should be numpy 1D arrays or strings.
    """
    file = open(filename, 'wb')
    try:
        file.write(__bin_magic__)
        file.write('ver00002')
        
        if not isinstance(data, dict):
            # parse legacy argument
            data = numpy.asarray(data)
            header = str(header)
            data = {'header': header, 'channel0': data}
        
        savenum(file, len(data), dtype=__bin_size_type__)
        for name in data.keys():
            savestr(file, name)
            section = data[name]
            if isinstance(section, str):
                savestr(file, "string")
                savestr(file, section)
            else:
                section = numpy.asarray(section)
                savestr(file, section.dtype.str)
                savenum(file, section.nbytes)
                section.tofile(file)
    finally:
        file.close()

def loadbinary(filename, full=False):
    """
    Load a numeric dataset from a binary file. Accepts a bare 64 bit floating
    point array or a file with a __bin_magic__ number (floating point NaN) and a header.
    If 'full' optional argument is set to True, a tuple is retured containing the
    file format version, header and data array. Version is 0 and header is an
    empty string, if a bare binary datafile was encountered.
    """
    file = open(filename, 'rb')
    try:
        s = file.read(len(__bin_magic__))
        if s != __bin_magic__:
            # very old format, no markup at all, just a chunk of IEEE float 64 data
            data = numpy.concatenate([
                numpy.frombuffer(s, dtype=numpy.float64, count=1),
                numpy.fromfile(file, dtype=numpy.float64)])
            if swapendian():
                data.byteswap(True)
            version = 0
            header = ''
            data = {'header': header, 'channel0': data}
        else:
            s = file.read(__bin_version_len__)
            if s == 'ver00001':
                # format version 1
                version = 1
                
                l = loadnum(file, dtype=__bin_size_type__)
                header = file.read(l)
                
                l = loadnum(file, dtype=__bin_size_type__)
                type = file.read(__bin_dtype_len__).split('\0')[0]
                data = numpy.fromfile(file, dtype=numpy.dtype(type), count=l)
                if swapendian():
                    data.byteswap(True)
                data = {'header': header, 'channel0': data}
            elif s == 'ver00002':
                # format version 2
                version = 2
                nsections = loadnum(file, dtype=__bin_size_type__)
                data = {}
                for n in range(nsections):
                    name = loadstr(file)
                    data[name] = loadbinarysection(file)
                header = data['header'] if 'header' in data.keys() else ''
            else:
                raise NotImplementedError("'%s' format is not implemented" % s)
        
        if full:
            return version, header, data
        else:
            return data['channel0'] 
    finally:
        file.close()

def loadbinarysection(file, skip=False):
    format = loadstr(file)
    length = loadnum(file, dtype=__bin_size_type__)
    if skip:
        file.seek(length, 1)
        return None
    else:
        if format.lower() == 'string':
            return file.read(length)
        else:
            try:
                dtype = numpy.dtype(format)
            except TypeError:
                raise ValueError("unsupported data format '%s' in binary file" % format)
            return numpy.fromfile(file, dtype=dtype, count=numpy.uint64(length / dtype.itemsize))

# old binary I/O - version 0
##
##def loadbinary(filename):
##    """
##    Load an array of floats from a given file stored in 64bit little endian format
##    """
##    data = numpy.fromfile(filename, dtype=numpy.float64)
##    if swapendian():
##        data.byteswap(True)
##    return data
##
##def savebinary(filename, data):
##    """
##    Save an array of floats into a given file in 64bit little endian format
##    """
##    data = numpy.array(data, dtype=numpy.float64)
##    if swapendian():
##        data = data.byteswap()
##    data.tofile(filename)

# old binary I/O - version 1
##def savebinary(filename, data, header='', dtype=numpy.float32):
##    """
##    Save a numeric dataset into a binary file. Accepts a 1D array 'data' and
##    an optional string 'header'. Optional 'dtype' argument specifies a format
##    for a number to be loaded from, defaults to 32 bit floating point.
##    """
##    file = open(filename, 'wb')
##    try:
##        file.write(__bin_magic__)
##        file.write('ver00001')
##
##        header=str(header)        
##        savenum(file, len(header), dtype=__bin_size_type__)
##        file.write(header)
##        
##        data = numpy.asarray(data, dtype=dtype).flatten()
##        if swapendian():
##            data = data.byteswap()
##        savenum(file, data.size, dtype=__bin_size_type__)
##
##        type = numpy.dtype(dtype).str
##        if len(type) > __bin_dtype_len__: raise UnsupportedError("Saving in format '%s' is not supported" % type)
##        file.write(type + '\0' * (__bin_dtype_len__ - len(type)))
##
##        data.tofile(file)
##    finally:
##        file.close()
##
##def loadbinary(filename, full=False, dtype=None):
##    """
##    Load a numeric dataset into a binary file. Accepts a bare 64 bit floating
##    point array or a file with a __bin_magic__ number (floating point NaN) and a header.
##    If 'full' optional argument is set to True, a tuple is retured containing the
##    file format version, header and data array. Version is 0 and header is an
##    empty string, if a bare binary datafile was encountered.
##    
##    If an optional 'dtype' argument is specified, the dataset is converted to
##    the specified format. It does not affect the loading of binary data from
##    the file.
##    """
##    file = open(filename, 'rb')
##    try:
##        s = file.read(len(__bin_magic__))
##        if s != __bin_magic__:
##            # very old format, no markup at all, just a chunk of IEEE float 64 data
##            data = numpy.concatenate([
##                numpy.frombuffer(s, dtype=numpy.float64, count=1),
##                numpy.fromfile(file, dtype=numpy.float64)])
##            if swapendian():
##                data.byteswap(True)
##            version = 0
##            header = ''
##        else:
##            s = file.read(__bin_version_len__)
##            if s == 'ver00001':
##                # format version 1
##                version = 1
##                
##                l = loadnum(file, dtype=__bin_size_type__)
##                header = file.read(l)
##                
##                l = loadnum(file, dtype=__bin_size_type__)
##                type = file.read(__bin_dtype_len__).split('\0')[0]
##                data = numpy.fromfile(file, dtype=numpy.dtype(type), count=l)
##                if swapendian():
##                    data.byteswap(True)
##            elif s == 'var00002':
##                # format version 2
##                version = 2
##                
##                l = loadnum(file, dtype=__bin_size_type__)
##                header = file.read(l)
##                
##            else:
##                raise NotImplementedError("'%s' format is not implemented" % s)
##        
##        if dtype is not None:
##            data = data.astype(dtype)
##        if full:
##            return version, header, data
##        else:
##            return data
##    finally:
##        file.close()

def n2str(number, fmt='%g'):
    """
    Convert a number into string.
    Handle special cases:
        "NaN" for not-a-number
        "Inf" for positive infinity
        "-Inf" for negative infinity    
    """
    if numpy.isfinite(number):
        return fmt % number
    if numpy.isinf(number):
        return 'Inf' if number > 0 else '-Inf'
    return 'NaN'
    
def smartfloat(arg):
    """
    Converts into float from different formats:
    
    1. if arg is a string, treat as a decimal floating point number.
       Handle special cases:
            "NaN", "-1.#IND..." is Not-a-Number
            "1.#QNAN" is minus Not-a-Number (???)
            "Inf", "+Inf", "1.#INF...", "+1.#INF..." is Positive Infinity
            "-Inf","-1.#INF..." is Negative Infinity
                        
    2. a real part is taken from a complex number
    3. float(arg) is called for anything else.
    """
        
    if isinstance(arg, (str, unicode)):
        s = arg.strip().upper()
        if s == 'NAN' or s[:7] == '-1.#IND':
            return numpy.NaN
        if s[:7] == '1.#QNAN':
            return -numpy.NaN
        if s == 'INF' or s == '+INF' or s[:6] == '1.#INF' or s[:7] == '+1.#INF':
            return numpy.Inf
        if s == '-INF' or s[:7] == '-1.#INF':
            return -numpy.Inf
    if isinstance(arg, complex):
        return float(arg.real)
    return float(arg)

def loadascii(filename, comments='#', delimiter=None, converters=None, skiprows=0, usecols=None, unpack=False, keep2D=True):
    """
    A CUSTOMISED VERSION OF pylab.load

    CHANGES FROM PYLAB:
    if keep2D is set to True (default):
        1. 2D arrays are preserved even if an empty/single line/single column is read.
        2a. if en empty file was read and "usecols" is specified, a shape is set to 0,len(usecols)
        2.b if an empty file was read and "usecols" is not specified, a shape is set to 0,0
    if keep2D is set to False:
        1. A single line/single column data is converted into a 1D array (standard pylab behaviour)
        2. An empty file is converted into an empty array, does not generate an error (pylab generates an error)

    Load ASCII data from filename into an array and return the array.

    The data must be regular, same number of values in every row

    filename can be a filename or a file handle.  Support for gzipped files is
    automatic, if the filename ends in .gz

    matfile data is not supported; use scipy.io.mio module

    Example usage:

      X = load('test.dat')  # data in two columns
      t = X[:,0]
      y = X[:,1]

    Alternatively, you can do the same with "unpack"; see below

    comments - the character used to indicate the start of a comment
    in the file

    delimiter is a string-like character used to seperate values in the
    file. If delimiter is unspecified or none, any whitespace string is
    a separator.

    converters, if not None, is a dictionary mapping column number to
    a function that will convert that column to a float.  Eg, if
    column 0 is a date string: converters={0:datestr2num}

    skiprows is the number of rows from the top to skip

    usecols, if not None, is a sequence of integer column indexes to
    extract where 0 is the first column, eg usecols=(1,4,5) to extract
    just the 2nd, 5th and 6th columns

    unpack, if True, will transpose the matrix allowing you to unpack
    into named arguments on the left hand side

        t,y = load('test.dat', unpack=True) # for  two column data
        x,y,z = load('somefile.dat', usecols=(3,5,7), unpack=True)

    See examples/load_demo.py which exeercises many of these options.
    """
    from matplotlib import cbook

    if converters is None: converters = {}
    fh = cbook.to_filehandle(filename)
    try:
        X = []

        if delimiter==' ':
            # space splitting is a special case since x.split() is what
            # you want, not x.split(' ')
            def splitfunc(x):
                return x.split()
        else:
            def splitfunc(x):
                return x.split(delimiter)

        converterseq = None
        for i,line in enumerate(fh):
            if i<skiprows: continue
            line = line.split(comments, 1)[0].strip()
            if not len(line): continue
            if converterseq is None:
                converterseq = [converters.get(j,smartfloat) for j,val in enumerate(splitfunc(line))]
            if usecols is not None:
                vals = line.split(delimiter)
                row = [converterseq[j](vals[j]) for j in usecols]
            else:
                row = [converterseq[j](val) for j,val in enumerate(splitfunc(line))]
            thisLen = len(row)
            X.append(row)
        
        X = numpy.array(X, numpy.float64)
        
        if len(X) == 0:
            if usecols is None:
                X.shape = 0,0
            else:
                X.shape = 0,len(usecols)
        
        if not keep2D:
            # pylab standard behaviour - flatten a 2D array if one line/one column
            r,c = X.shape
            if r==1 or c==1:
                X.shape = max(r,c),
            elif r==0 or c==0:
                X.shape = 0,
            
        if unpack: return X.transpose()
        else: return X
    finally:
        if fh is not filename:
            fh.close()

def loadasciidir(dirname, mask=None, antimask=None, comments='#', delimiter=None, converters=None, skiprows=0, usecols=None, unpack=False, quiet=False):
    """
    concatenate date from ascii spreadsheets found in 'dirname'.
    Wildcards 'mask' and 'antimask' can be specified to select/reject
    files by name.
    'comments', 'delimeter', 'converters', 'skiprows', 'usecols' and 'unpack'
    are passed to 'flib.loadascii' when loading individual files.
    
    Boolean 'quiet' argument specifies how to handle files that can not
    be read by 'flib.loadascii' or a situation when files have different
    number of columns. If 'quiet' is False exceptions are raised.
    Otherwise the unreadable files are ignored and the the spreadsheet
    is narrowed to the smallest number of columns among the files concatenated.
    """
    X = None
    
    for filename in os.listdir(dirname):
        path = os.path.join(dirname, filename)
        if not os.path.isfile(path):
            continue
        if mask is not None and not fnmatch.fnmatchcase(filename, mask):
            continue
        if antimask is not None and fnmatch.fnmatchcase(filename, antimask):
            continue
        if quiet:
            try:
                Y = loadascii(path, comments, delimiter, converters, skiprows, usecols, keep2D=True)
            except Exception:
                continue
        else:
            Y = loadascii(path, comments, delimiter, converters, skiprows, usecols, keep2D=True)
        if X is None:
            X = Y
        else:
            if X.shape[1] != Y.shape[1]:
                if quiet:
                    l = min(X.shape[1], Y.shape[1])
                    X = X[:,:l]
                    Y = Y[:,:l]
                else:
                    raise ValueError("'%s': number of columns does not match other files being merged" % path)
            X = numpy.vstack([X, Y])

    if X is None:
        X = numpy.zeros([len(usecols) if usecols is not None else 0, 0])
    if unpack:
        X = X.transpose()
    gc.collect()
    return X

def saveascii(filename, X, fmt='%.8g', delimiter='\t', header=None, append=False, pack=None):
    """
    A CUSTOMISED VERSION OF pylab.save
    CHANGES:
        1. Allows to add a header to the file.
        2. Allows append data to an existing file. It is totally a resposibility of the user to make sure
           data appended is in the same format as in the initial part of the file.
           The header is not written down if appending to an existing file.
        2. Default number format is '%g'
        3. Default delimiter is a tab character '\t'
        4. If 'pack' is True, 'X' is transposed before saving (compatible with 'flib.loadascii' and 'pylab.load' with 'unpack=True')
           If 'pack' is None (default), 'X' is transposed if it is a tuple, and preserved otherwise
           If 'pack' is False, 'X', transposition is not carried (the default pylab behaviour)
    
    Save the data in X to file filename using fmt string to convert the
    data to strings

    filename can be a filename or a file handle.  If the filename ends in .gz,
    the file is automatically saved in compressed gzip format.  The load()
    command understands gzipped files transparently.

    Example usage:

    save('test.out', X)         # X is an array
    save('test1.out', (x,y,z))  # x,y,z equal sized 1D arrays
    save('test2.out', x)        # x is 1D
    save('test3.out', x, fmt='%1.4e')  # use exponential notation

    delimiter is used to separate the fields, eg delimiter ',' for
    comma-separated values
    """
    from matplotlib import cbook

    if cbook.is_string_like(filename):
        if append: mode = 'a'
        else: mode = 'w'

        already_exists = os.path.exists(filename)
        if filename.endswith('.gz'):
            import gzip
            fh = gzip.open(filename, mode + 'b')
        else:
            fh = file(filename, mode)
    elif hasattr(filename, 'seek'):
        fh = filename
        append = True
        already_exists = True
    else:
        raise ValueError('filename must be a string or file handle')

    try:
        if (header is not None) and not (append and already_exists):
            fh.write(header + '\n')

        isXtuple = type(X) is tuple
        X = numpy.asarray(X)
        
        origShape = None
        if X.ndim == 1:
            origShape = X.shape
            X.shape = len(X), 1

        if pack is True or (pack is None and isXtuple):
            X = X.transpose()

        for row in X:
            fh.write(delimiter.join([n2str(val, fmt) for val in row]) + '\n')

        if pack is True or (pack is None and isXtuple):
            X = X.transpose()

        if origShape is not None:
            X.shape = origShape
    finally:
        if fh is not filename:
            fh.close()

def readheader(filename, comment='#', skiprows=0):
    from matplotlib import cbook

    fh = cbook.to_filehandle(filename)
    header = ''
    try:
        X = []
        for i,line in enumerate(fh):
            if i<skiprows: continue
            if line.startswith(comment):
                header += line
            else:
                break
    finally:
        if fh is not filename:
            fh.close()
    header
    return header.rstrip()

def reduce_similar(x, y, xstep = 0, ystep = 0):
    """
    Reduce size of an array by combining similar points into their centre of mass.
    'x' and 'y' are expected to be 1D arrays of the same length, containing the data.

    If both 'xstep' and 'ystep' are 0, the function deletes multiple occurances of
    the same points.
    
    If 'xstep' is positive, points with similar 'y' and scatter in 'x' smaller than 'xstep'
    are combined into a single point with an average 'x'. For a negative 'xstep' the
    absolute value is treated as an xstep, relative to the total span of values in 'x'.

    If 'ystep' is non-zero, the behaviour is similar to the above, but with the roles
    of 'x' and 'y' swapped.
    
    Both 'xstep' and 'ystep' currently can not be non-zero.
    
    """
    if (xstep != 0) and (ystep != 0):
        raise ValueError('Only one of xstep and ystep can be non-zero')
    
    if len(x) != len(y):
        raise ValueError('x and y must be the same length')
        
    if len(x) < 2:
        return x,y
        
    if xstep < 0:
        step = abs(xstep)*(max(x) - min(x))
    elif xstep > 0:
        step = xstep
    elif ystep < 0:
        step = abs(ystep)*(max(y) - min(y))
    elif ystep > 0:
        step = ystep
    else:
        step = 0

    if xstep != 0:
        xy = numpy.array([y,x])
    else:
        xy = numpy.array([x,y])

    xy = xy.transpose().tolist()
    # at this point xy is an array of [x[i], y[i]] pairs, sort works alphabetically
    xy.sort()

    xr = [xy[0][0]]
    yr = [xy[0][1]]
    count = 1
    
    if step == 0:
        def are_similar(x1,y1, x2,y2, step):
            return (x1 == x2) and (y1 == y2)
        def average_array(array, newval, count):
            pass
    else:
        def are_similar(x1,y1, x2,y2, step):
            return (x1 == x2) and abs(y2 - y1) <= step
        def average_array(array, newval, count):
            array[-1] = (count * array[-1] + newval) / (count+1)

    for i in range(1,len(xy)):
        xi = xy[i][0]
        yi = xy[i][1]

        if are_similar(xi, yi, xr[-1], yr[-1], step):
            average_array(xr, xi, count)
            average_array(yr, yi, count)
            count += 1
        else:
            xr.append(xi)
            yr.append(yi)
            count = 1

    if xstep != 0:
        return yr, xr
    else:
        return xr, yr

def a2str(array, sep=', ', maxlen=None, gap='...'):
    """
    Return a comma-separated list of str() representations of the elements of
    the given array. Like str(array), but without surrounding square brackets.
    
    'array' - the array to represent
    'sep' - separator between elements
    'maxlen' - maximum number of elements to represent the tail and the head.
    If array length exceeds 2*maxlen+1, only first and last 'maxlen' 
    elements are represented and the gap is filled with 'gap'.
    """
    if maxlen is None or len(array) <= 2*maxlen+1:
        a = array
    else:
        a = list(array[:maxlen]) + [gap] + list(array[-maxlen:])
    
    return sep.join([str(e) for e in a])    
#    s = ''
#    for x in array:
#        s += str(x) + ', '
#    return s[:-2]

def finites(*args):
    """
    Return a subset of an array containing only finite elements
    Either a single array argument is expected or an arbitrary length
    set of scalar arguments that are combined into an array
    """
    if len(args) == 1 and not numpy.isscalar(args[0]):
        args = args[0]
    args = numpy.asarray(args)
    return args[numpy.isfinite(args)]

def smbpath(path):
    """
    convert smb filename to what operating system will understand
    """
    if sys.platform[:5].lower() == 'linux':
        path = path.replace('\\','/')
        if path[:2] == '//':
            return os.path.join('/mnt', path[2:])
        else:
            return path
    else:
        return path

def absdiff(array, max=True):
    """
    Return an array of absolute values of differences between nearest
    neighbours in the array. For every element the maximum (if max is True) or
    minumum (otherwise) of abs diffs between its value and its neighbors is
    stored in the returned array.
    """
    
    diff = array[1:] - array[:-1]

    diff12 = numpy.concatenate(([0.0], diff))
    diff21 = numpy.concatenate((-diff, [0.0]))
    return (numpy.maximum if max else numpy.minimum)(abs(diff12), abs(diff21))

def average(array, Nwindow = 10, truncate=False, extend_last_bin=True):
    """
    Average 'Nwindow' long sections of 'array' along given 'axis'
    to reduce size and scatter
    """
    return slice(array, lambda a: a.mean(axis=1), Nwindow, truncate, extend_last_bin)

##    array = numpy.asarray(array)
##    shape = list(array.shape)
##    shape[0] = int(numpy.ceil(shape[0] * 1.0 / Nwindow))
##    data = numpy.zeros(shape, dtype=array.dtype)
##
##    if len(shape) > 1:
##        for N in range(shape[0]):
##            data[N,:] = numpy.mean(array[N*Nwindow:(N+1)*Nwindow, :], axis=0)
##    else:
##        for N in range(shape[0]):
##            data[N] = numpy.mean(array[N*Nwindow:(N+1)*Nwindow], axis=0)
##    
##    return data

def slice_mean_and_std(array, Nwindow = 10, truncate=False, extend_last_bin=True):
    """
    divide the original 'array' into bins 'Nwindow' elements wide
    and return arrays containing the means and standard deviations over the bins.
    
    'truncate' and 'extend_last_bin' govern the treatment of the very end of
    the supplied array if its size is not a multiple of 'Nwindow':
    
    If 'truncate' is True, the last up to 'Nwindow-1' elements are truncated.
    Otherwise they are treated as a separate bin if 'extend_last_bin' is False
    or added to the preceeding 'Nwindow' elements if 'extend_last_bin' is True.
    """
    return slice(array, lambda a: a.mean(axis=1), Nwindow, truncate, extend_last_bin),\
            slice(array, lambda a: a.std(axis=1), Nwindow, truncate, extend_last_bin)
##    array = numpy.asarray(array)
##    d = array.copy()
##    d.resize([int(len(d) / Nwindow), Nwindow])
##    
##    mean = d.mean(axis=1)
##    std = d.std(axis=1)
##    
##    if not truncate and d.size > int(len(d) / Nwindow) * Nwindow:
##        if extend_last_bin and mean.size() > 0:
##            d = array.flatten()[Nwindow * (int(len(d) / Nwindow)-1):]
##            mean[-1] = numpy.mean(d)
##            std[-1] = numpy.std(d)
##        else:
##            d = array.flatten()[Nwindow * int(len(d) / Nwindow):]
##            mean = numpy.concatenate([mean, [numpy.mean(d)]])
##            std = numpy.concatenate([std, [numpy.std(d)]])
##    
##    return mean, std

##def slice(array, function, Nwindow = 10, truncate=False, extend_last_bin=True):
##    """
##    divide the original 'array' into bins 'Nwindow' elements wide
##    and do similar processing of the bins.
##    'function' is called on the reshaped array of MxNwindow and
##    bins are along the axis=1 (second dimension).
##    
##    'truncate' and 'extend_last_bin' govern the treatment of the very end of
##    the supplied array if its size is not a multiple of 'Nwindow':
##    
##    If 'truncate' is True, the last up to 'Nwindow-1' elements are truncated.
##    Otherwise they are treated as a separate bin if 'extend_last_bin' is False
##    or added to the preceeding 'Nwindow' elements if 'extend_last_bin' is True.
##    """
##    array = numpy.asarray(array).flatten()
##    d = array.copy()
##    N = int(len(d) / Nwindow)
##    d.resize([N, Nwindow])
##    r = function(d)
##    
##    if not truncate and array.size > Nwindow * N:
##        if extend_last_bin and r.size > 0:
##            d = array[Nwindow * (N-1):]
##            d = d.reshape([1, len(d)])
##            r[-1:] = function(d)
##        else:
##            d = array[Nwindow * N:]
##            d = d.reshape([1, len(d)])
##            r = numpy.concatenate([r, function(d)])
##    
##    return r

def slice(array, function, Nwindow = 10, truncate=False, extend_last_bin=True):
    """
    divide the original 'array' into bins 'Nwindow' elements wide along the first axis.
    and do similar processing of the bins.
    'function' is called on the reshaped array of MxNwindow and
    bins are along the axis=1 (second dimension).
    
    'truncate' and 'extend_last_bin' govern the treatment of the very end of
    the supplied array if its size is not a multiple of 'Nwindow':
    
    If 'truncate' is True, the last up to 'Nwindow-1' elements (rows) are truncated.
    Otherwise they are treated as a separate bin if 'extend_last_bin' is False
    or added to the preceeding 'Nwindow' elements if 'extend_last_bin' is True.
    """
    array = numpy.asarray(array)
    d = array.copy()
    N = int(d.shape[0] / Nwindow)
    d.resize((N, Nwindow) + d.shape[1:])
    r = function(d)
    
    if not truncate and array.size > Nwindow * N:
        if extend_last_bin and r.size > 0:
            d = array[Nwindow * (N-1):]
            d = d.reshape((1, len(d)) + d.shape[1:])
            r[-1:] = function(d)
        else:
            d = array[Nwindow * N:]
            d = d.reshape((1, len(d)) + d.shape[1:])
            r = numpy.concatenate([r, function(d)])
    
    return r

def phase(c):
    """
    Return Argument (phase in radians) of complex number or an array of complex
    numbers
    """
    return math.atan2(c.imag, c.real) if numpy.isscalar(c) else numpy.arctan2(numpy.imag(c), numpy.real(c))

class Period(object):
    def __init__(this, start, end, **vargs):
        """
        Construct a period.
        'start' and 'end' determine the moments in time that bound the Period
        then any number of keyword arguments can be specified. This will be
        transfered into properties of the period.
        """
        
        this.start = tm(start)
        this.end = tm(end)
    
        for n in vargs.keys():
            if n in this.__dict__:
                raise ValueError("Can not add attribute '%s' to a Period, because it is already there" % n)
            this.__dict__[n] = vargs[n]

    def __eq__(a, b):
        """
        Comparision. True if 'b' is a moment of time that belongs to the period 'a'
        """
        if isinstance(b, Period):
            a,b = b,a
        try:
            t = tm(b)
            return (a.start <= t) & (t <= a.end)
        except Exception:
            return False
        
    def __repr__(this):
        return 'Period(%s-%s)' % (tm2str(this.start), tm2str(this.end))

def integrate(x, y):
    """
    Return an array
        i[x0] = \int_{x[0]}^{x0} y dx
    """

    i = numpy.zeros_like(y)

    for k in range(1, len(x)):
        i[k] = i[k-1] + 0.5 * (y[k] + y[k-1]) * (x[k] - x[k-1])

    return i

def floatcmp(x, y):
    """
    Return True if x and y are similar numbers in a sense, that 1 equals 1.0 and NaN equals NaN,
    infinities are unsigned
    False otherwise.
    """
    
    return (x == y) or (numpy.isnan(x) and numpy.isnan(y)) or (numpy.isinf(x) and numpy.isinf(y))

# the following function comes from here: http://www.noogz.net/website/blog/programming/20080327-StringIRep.html
def ireplace(self,old,new,count=0):
    '''Behaves like string.replace(), but does so in a case-insensitive fashion.'''
    import re
    pattern = re.compile(re.escape(old),re.I)
    return re.sub(pattern,new,self,count)

def load_scope_xy(filename, binary=True):
    """
    load data from a two channel scope trace, either in binary of ascii format.
    return a tuple of two arrays, x and y.
    """
    if binary:
        data = loadbinary(filename, full=True)
        return data[2]['channel0'], data[2]['channel1']
    else:
        return loadascii(filename, usecols=(1,2), unpack=True)

def read_timelog(filename, start=None, end=None):
    """
    Parse a file in a format 'date time event name'
    with '# ...' as comments.
    Return an array of 'Period' objects with start and end equal to the given
    date and time and event name saved as 'name'.
    If 'start'/'end' are given, only events after start/before end are selected.
    """
    if start is None:
        start = -numpy.inf
    else:
        start = tm(start)
    if end is None:
        end = numpy.inf
    else:
        end = tm(end)
    events = []
    try:
        f = open(filename, 'r')
        for line in f:
            line = line.strip()
            if len(line) < 1 or line[0] == '#':
                continue
            
            i_tab = line.find('\t')
            i_sp = line.find(' ')
            if i_tab > 0 and i_tab < i_sp:
                i_sp = i_tab
            i_tab = line.find('\t', i_sp+1)
            i_sp = line.find(' ', i_sp+1)
            if i_tab > 0 and i_tab < i_sp:
                i_sp = i_tab
            
            try:
                t = tm(line[:i_sp])
                if t >= start and t <= end:
                    events.append(Period(t, t, name=line[i_sp+1:]))
            except:
                pass
    finally:
        f.close()
    return events

def c(*args, **vargs):
    """
    concatenate arrays:
    unlike numpy.concatenate group all unnamed arguments together
    """
    return numpy.concatenate(args, **vargs)
