// UNICODE.H
//
// Copyright (c) 1997-1999 Symbian Ltd.  All rights reserved.
//

//
// The TUnicode class contains a Unicode value. It is provided for convenience in implementing the
// character attribute retrieval functions. It also contains:
// 	structures used to store and search the tables of character information:
// 		when modifying these, please remember that they form part of tables that must be initialised as aggregates,
// 		so they cannot have constructors, non-public members, base classes or virtual functions. I have used 'struct'
// 		rather than class to make that clear.
// 	a default constructor that sets the stored Unicode value to 0xFFFF - an invalid character
// 	constructors and conversion functions for converting between integers and TUnicode objects
// 	functions to retrieve the categories and attributes
// The categories are explained in 'unicode_fields.txt', which is a key to the fields of the data file
// 'unidata2.txt'; these files are supplied on the CD-ROM that comes with the book 'The Unicode Standard,
// Version 2.0'.
// Because the category constants must be available to users they are defined not here but in the TChar
// class in e32std.h.
//


#ifndef __UNICODE_H__
#define __UNICODE_H__ 1

#ifndef __E32STD_H__
#include <e32std.h>
#endif

// This material is used only in the Unicode build.
#ifdef _UNICODE

/*
A structure to contain the raw data about a Unicode character:
it must not have a constructor because an array of these in unitable.cpp is initialised as an aggregate.
*/
struct TUnicodeData
	{
	TUint8 iCategory;					// general category
	TUint8 iBDCategory;					// bidirectional category
	TUint8 iCombiningClass;				// combining class: number (currently) in the range 0..234
	TUint8 iFlags;						// flags: does this character have a lower case form, etc.
	TInt16 iOffsets;					// the low 6 bits are the digit offset: if this character
										// has a numeric value add this to the character code
										// then take the bottom 6 bits to get the numeric value;
										// the high 10 bits are the signed offset to lower or upper case:
										// add to convert to lower case, subtract to convert to upper case
	};

/*
A structure for a range of Unicode characters with the same raw data; must not have a
constructor because an array of these in unitable.cpp is initialised as an aggregate.
*/
struct TUnicodeDataRange
	{
	TUint16 iRangeStart;	// Unicode value of the start of the range of characters
	TInt16 iIndex;			// index into an array of character information structures (-1 means data no available)
	};

/*
A structure to hold a set of character data: either the standard Unicode data or locale-specific
data that overrides it.
*/
struct TUnicodeDataSet
	{
	const TUnicodeData *iData;			// array of character data structures
	const TUnicodeDataRange *iRange;	// array of ranges referring to elements of iData
	TInt iRanges;						// number of elements in the array of ranges
	};

/*
A table of ranges of Unicode characters sharing an attribute stored separately
from the main attribute table. These are used for locale-specific
data like radical and stroke count for Chinese characters.
*/
struct TUnicodeAttributeRangeTable
	{
	TBool GetAttribute(TUint16 aCode,TUint16& aValue) const;

	struct TRange
		{
		TUint16 iRangeStart;	// Unicode value of the start of the range of characters
		TUint16 iValue;			// the attribute value for this range
		};
	const TRange* iRange;		// the range table
	TInt iRanges;				// number of ranges in the table
	TUint16 iNullValue;			// value used to mean 'undefined' or 'unknown'
	};

/*
A table of arrays of attributes for certain ranges of Unicode characters. For example, attributes
only relevant to Chinese characters, like the KangXi dictionary position, could be stored
in two arrays: 0x4E00-0x9FFF (unified Han) and 0xF900-0xFA2D (compatibility ideographs).
*/
struct TUnicodeAttributeArrayTable
	{
	TBool GetAttribute(TUint16 aCode,TUint16& aValue) const;

	struct TArray
		{
		TUint16 iBase;				// Unicode value of the first character for which data is stored
		TUint16 iCount;				// number of values in the array
		const TUint16* iData;		// the data of the array
		};
	const TArray* iArray;			// the array table
	TInt iArrays;					// the number of arrays in the table
	TUint16 iNullValue;				// value used to mean 'undefined' or 'unknown'
	};

/*
An attribute repertoire is a table of locale-specific attributes,
each of which has a unique identifier.
*/
struct TUnicodeAttributeRepertoire
	{
	enum TType
		{
		ERangeTable,
		EArrayTable
		};
	struct TAttribute
		{
		TUint iId;
		TType iType;
		union
			{
			const TUnicodeAttributeRangeTable* iRangeTable;
			const TUnicodeAttributeArrayTable* iArrayTable;
			};
		};
	const TAttribute* iAttribute;
	TInt iAttributes;
	};

/*
A class to hold a Unicode character and provide functions for characterisation (e.g., is this character lowercase?)
composition (e.g., create a character from a base character and an accent), and decomposition
(e.g., remove the accent from this character if there is one).
*/
class TUnicode
	{
	public:

	// Flags used in the TUnicodeData structure.
	enum
		{
		EHasLowerCase = 1,			// adding the case offset gives the lower case form
		EHasUpperCase = 2,			// subtracting the case offset gives the upper case form
		EHasTitleCase = 4,			// a title case form exists that is distinct from the upper case form
		ENumeric = 8,				// the character has a numeric value
		ELargeNumeric = 16,			// the numeric value is greater than 31
		EFraction = 32,				// the numeric value is fractional
		EMirrored = 64				// the character is mirrored
		};

	// Constructors
	TUnicode() { iCode = 0xFFFF; }
	TUnicode(TInt c) { iCode = (TUint16)(c <= 0xFFFF ? c : 0xFFFF); }
	operator TInt() const { return iCode; }

	// Attribute retrieval (functions used by the ExecHandler class, etc., in ekern.dll take IMPORT_C)
	void GetInfo(TChar::TCharInfo& aInfo,const TUnicodeDataSet *aOverridingDataSet) const;
	IMPORT_C TChar::TCategory GetCategory(const TUnicodeDataSet *aOverridingDataSet) const;
	TChar::TBDCategory GetBDCategory(const TUnicodeDataSet *aOverridingDataSet) const;
	TInt GetCombiningClass(const TUnicodeDataSet *aOverridingDataSet) const;
	IMPORT_C TUint16 GetLowerCase(const TUnicodeDataSet *aOverridingDataSet) const;
	IMPORT_C TUint16 GetUpperCase(const TUnicodeDataSet *aOverridingDataSet) const;
	TUint16 GetTitleCase(const TUnicodeDataSet *aOverridingDataSet) const;
	TBool IsMirrored(const TUnicodeDataSet *aOverridingDataSet) const;
	TInt16 GetNumericValue(const TUnicodeDataSet *aOverridingDataSet) const;
	TChar::TCJKWidth GetCJKWidth() const;
	static TBool Compose(TUint16& aResult,const TUint16 *aString,TInt aLength);
	const TUint16 *Decompose() const;
	IMPORT_C TUint16 Fold(TInt aFlags,const TUnicodeDataSet *aOverridingDataSet) const;
	
	// Retrieval of (usually locale-specific) attributes from an attribute repertoire.
	TBool GetAttribute(TUint32 aId,TUint16& aValue,const TUnicodeAttributeRepertoire* aRepertoire);

	// Utilities
	static TInt Compare(const TUint16 *aString1,TInt aLength1,const TUint16 *aString2,TInt aLength2);

	private:
	const TUnicodeData& GetData(const TUnicodeDataSet *aOverridingDataSet) const;
	const TUnicodeData *GetDataFromDataSet(const TUnicodeDataSet& aDataSet) const;
	TUint16 GetLowerCase(const TUnicodeData& aData) const;
	TUint16 GetUpperCase(const TUnicodeData& aData) const;
	TUint16 GetTitleCase(const TUnicodeData& aData) const;
	TInt16 GetNumericValue(const TUnicodeData& aData) const;

	TUint16 iCode;

	static const TUint8 CategoryTable[256];		// categories of characters in the range 0..255
	static const TUint8 UpperCaseTable[256];	// uppercase table for the range 0..255
	static const TUint8 LowerCaseTable[256];	// uppercase table for the range 0..255
	static const TUint8 FoldTable[256];			// fold table (strip accents, fold case) for the range 0..255
	static const TUint16 CJKWidthFoldTable[256];// width fold table (convert from width variants) for range 0xFF00..0xFFFF
	};

// Declarations for tables held in unitable.cpp and used by unicode.cpp.
extern const TUnicodeDataSet TheUnicodeDataSet;
extern const TUint16 TheUnicodeCompositionBuffer[];
extern const TInt16 TheUnicodeComposeTable[];
extern const TInt16 TheUnicodeDecomposeTable[];
extern const TInt TheUnicodeCompositions;

#endif // _UNICODE

#endif // __UNICODE_H__
