00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00020 #ifndef flc_string_H
00021 #define flc_string_H
00022
00023 #include <falcon/types.h>
00024 #include <falcon/garbageable.h>
00025 #include <falcon/gcalloc.h>
00026 #include <falcon/deepitem.h>
00027 #include <stdlib.h>
00028
00029 #define FALCON_STRING_ALLOCATION_BLOCK 32
00030
00031 namespace Falcon {
00032
00033 class Stream;
00034 class VMachine;
00035 class LiveModule;
00036
00053 class String;
00054
00075 namespace csh {
00076
00081 typedef enum
00082 {
00083 cs_static,
00084 cs_buffer,
00085 cs_static16,
00086 cs_buffer16,
00087 cs_static32,
00088 cs_buffer32
00089 } t_type;
00090
00092 const uint32 npos = 0xFFFFFFFF;
00093
00097 class FALCON_DYN_CLASS Base
00098 {
00099 public:
00100 virtual ~Base() {}
00101 virtual t_type type() const =0;
00102 virtual uint32 charSize() const = 0;
00103 virtual uint32 length( const String *str ) const =0;
00104 virtual uint32 getCharAt( const String *str, uint32 pos ) const =0;
00105
00106 virtual void setCharAt( String *str, uint32 pos, uint32 chr ) const =0;
00107 virtual void subString( const String *str, int32 start, int32 end, String *target ) const =0;
00109 virtual uint32 find( const String *str, const String *element, uint32 start =0, uint32 end = npos) const = 0;
00110 virtual uint32 rfind( const String *str, const String *element, uint32 start =0, uint32 end = npos) const = 0;
00111 virtual void insert( String *str, uint32 pos, uint32 len, const String *source ) const =0;
00112 virtual bool change( String *str, uint32 start, uint32 end, const String *source ) const =0;
00113 virtual void remove( String *str, uint32 pos, uint32 len ) const =0;
00114 virtual String *clone( const String *str ) const =0;
00115 virtual void destroy( String *str ) const =0;
00116
00117 virtual void bufferize( String *str ) const =0;
00118 virtual void bufferize( String *str, const String *strOrig ) const =0;
00119 virtual void reserve( String *str, uint32 size, bool relative = false, bool block = false ) const = 0;
00120 virtual void shrink( String *str ) const = 0;
00121
00122 virtual const Base *bufferedManipulator() const =0;
00123 };
00124
00129 class FALCON_DYN_CLASS Byte: public Base
00130 {
00131 public:
00132 virtual ~ Byte() {}
00133 virtual uint32 length( const String *str ) const;
00134 virtual uint32 getCharAt( const String *str, uint32 pos ) const;
00135 virtual void subString( const String *str, int32 start, int32 end, String *target ) const;
00136 virtual bool change( String *str, uint32 pos, uint32 end, const String *source ) const;
00137 virtual String *clone( const String *str ) const;
00138 virtual uint32 find( const String *str, const String *element, uint32 start =0, uint32 end = 0) const;
00139 virtual uint32 rfind( const String *str, const String *element, uint32 start =0, uint32 end = 0) const;
00140 virtual void remove( String *str, uint32 pos, uint32 len ) const;
00141
00142 virtual void bufferize( String *str ) const;
00143 virtual void bufferize( String *str, const String *strOrig ) const;
00144
00145 virtual void reserve( String *str, uint32 size, bool relative = false, bool block = false ) const;
00146 virtual const Base *bufferedManipulator() const { return this; }
00147 };
00148
00149
00157 class FALCON_DYN_CLASS Static: public Byte
00158 {
00159 public:
00160 virtual ~Static() {}
00161 virtual t_type type() const { return cs_static; }
00162 virtual uint32 charSize() const { return 1; }
00163
00164 virtual void setCharAt( String *str, uint32 pos, uint32 chr ) const;
00165 virtual void insert( String *str, uint32 pos, uint32 len, const String *source ) const;
00166 virtual void remove( String *str, uint32 pos, uint32 len ) const;
00167 virtual void destroy( String *str ) const;
00168
00169 virtual void reserve( String *str, uint32 size, bool relative = false, bool block = false ) const;
00170 virtual void shrink( String *str ) const;
00171 virtual const Base *bufferedManipulator() const;
00172 };
00173
00174
00183 class FALCON_DYN_CLASS Buffer: public Byte
00184 {
00185 public:
00186 virtual ~Buffer() {}
00187 virtual t_type type() const { return cs_buffer; }
00188 virtual uint32 charSize() const { return 1; }
00189
00190 virtual void setCharAt( String *str, uint32 pos, uint32 chr ) const;
00191 virtual void insert( String *str, uint32 pos, uint32 len, const String *source ) const;
00192 virtual void destroy( String *str ) const;
00193 virtual void reserve( String *str, uint32 size, bool relative = false, bool block = false ) const;
00194 virtual void shrink( String *str ) const;
00195
00196 };
00197
00198 class FALCON_DYN_CLASS Static16: public Static
00199 {
00200 public:
00201 virtual ~Static16() {}
00202 virtual uint32 charSize() const { return 2; }
00203 virtual uint32 length( const String *str ) const;
00204 virtual uint32 getCharAt( const String *str, uint32 pos ) const;
00205 virtual void setCharAt( String *str, uint32 pos, uint32 chr ) const;
00206 virtual void remove( String *str, uint32 pos, uint32 len ) const;
00207 virtual void reserve( String *str, uint32 size, bool relative = false, bool block = false ) const;
00208 virtual const Base *bufferedManipulator() const;
00209 };
00210
00211 class FALCON_DYN_CLASS Static32: public Static16
00212 {
00213 public:
00214 virtual ~Static32() {}
00215 virtual uint32 charSize() const { return 4; }
00216 virtual uint32 length( const String *str ) const;
00217 virtual uint32 getCharAt( const String *str, uint32 pos ) const;
00218 virtual void setCharAt( String *str, uint32 pos, uint32 chr ) const;
00219 virtual void remove( String *str, uint32 pos, uint32 len ) const;
00220 virtual void reserve( String *str, uint32 size, bool relative = false, bool block = false ) const;
00221 virtual const Base *bufferedManipulator() const;
00222 };
00223
00224 class FALCON_DYN_CLASS Buffer16: public Buffer
00225 {
00226 public:
00227 virtual uint32 charSize() const { return 2; }
00228 virtual uint32 length( const String *str ) const;
00229 virtual uint32 getCharAt( const String *str, uint32 pos ) const;
00230 virtual void setCharAt( String *str, uint32 pos, uint32 chr ) const;
00231 };
00232
00233 class FALCON_DYN_CLASS Buffer32: public Buffer16
00234 {
00235 public:
00236 virtual ~Buffer32() {}
00237 virtual uint32 charSize() const { return 4; }
00238 virtual uint32 length( const String *str ) const;
00239 virtual uint32 getCharAt( const String *str, uint32 pos ) const;
00240 virtual void setCharAt( String *str, uint32 pos, uint32 chr ) const;
00241 };
00242
00243 extern FALCON_DYN_SYM Static handler_static;
00244 extern FALCON_DYN_SYM Buffer handler_buffer;
00245 extern FALCON_DYN_SYM Static16 handler_static16;
00246 extern FALCON_DYN_SYM Buffer16 handler_buffer16;
00247 extern FALCON_DYN_SYM Static32 handler_static32;
00248 extern FALCON_DYN_SYM Buffer32 handler_buffer32;
00249
00250 }
00251
00282 class FALCON_DYN_CLASS String: public GCAlloc
00283 {
00284
00285 friend class csh::Base;
00286 friend class csh::Byte;
00287 friend class csh::Static;
00288 friend class csh::Buffer;
00289 friend class csh::Static16;
00290 friend class csh::Buffer16;
00291 friend class csh::Static32;
00292 friend class csh::Buffer32;
00293
00294 protected:
00295 const csh::Base *m_class;
00296 uint32 m_allocated;
00297 uint32 m_size;
00298 uint32 m_id;
00299
00300 byte *m_storage;
00301
00305 bool m_bExported;
00306
00307
00308 byte m_bFlags;
00309
00310 bool m_bCore;
00311
00317 explicit String( csh::Base *cl ) :
00318 m_class( cl )
00319 {}
00320
00321 public:
00322
00323 enum constants {
00324 npos = csh::npos,
00325 no_id = 0xFFFFFFFF
00326 };
00327
00328
00335 String():
00336 m_class( &csh::handler_static ),
00337 m_allocated( 0 ),
00338 m_size( 0 ),
00339 m_id( no_id ),
00340 m_storage( 0 ),
00341 m_bExported( false ),
00342 m_bCore( false )
00343 {
00344 }
00345
00346
00371 String( const char *data );
00372
00398 String( const wchar_t *data );
00399
00400
00414 String( const char *data, int32 len );
00415
00429 String( const wchar_t *data, int32 len );
00430
00431
00434 explicit String( uint32 prealloc );
00435
00448 String( const String &other ):
00449 m_allocated( 0 ),
00450 m_id( no_id ),
00451 m_bExported( false ),
00452 m_bCore( false )
00453 {
00454 copy( other );
00455 }
00456
00457
00471 String( const String &other, uint32 begin, uint32 end = csh::npos );
00472
00477 ~String()
00478 {
00479 m_class->destroy( this );
00480 }
00481
00488 void copy( const String &other );
00489
00499 String &bufferize( const String &other );
00500
00504 String &bufferize();
00505
00521 String &adopt( char *buffer, uint32 size, uint32 allocated );
00522
00538 String &adopt( wchar_t *buffer, uint32 size, uint32 allocated );
00539
00540
00546 const csh::Base *manipulator() const { return m_class; }
00547
00554 void manipulator( csh::Base *m ) { m_class = m; }
00555
00561 csh::t_type type() const { return m_class->type(); }
00562
00568 uint32 allocated() const { return m_allocated; }
00569
00570
00576 void allocated( uint32 s ) { m_allocated = s; }
00577
00581 uint32 size() const { return m_size; }
00585 void size( uint32 s ) { m_size = s; }
00586
00591 byte *getRawStorage() const { return m_storage; }
00592
00596 void setRawStorage( byte *b ) { m_storage = b; }
00597
00601 void setRawStorage( byte *b, int size ) {
00602 m_storage = b;
00603 m_size = size;
00604 m_allocated = size;
00605 }
00606
00614 uint32 length() const { return m_class->length( this ); }
00615
00635 uint32 toCString( char *target, uint32 bufsize ) const;
00636
00653 uint32 toWideString( wchar_t *target, uint32 bufsize ) const;
00654
00666 void shrink() { m_class->shrink( this ); }
00667
00668
00669 uint32 id() const { return m_id; }
00670 void id( uint32 val ) { m_id = val; }
00671
00672 uint32 getCharAt( uint32 pos ) const { return m_class->getCharAt( this, pos ); }
00673 void setCharAt( uint32 pos, uint32 chr ) { m_class->setCharAt( this, pos, chr ); }
00674 String subString( int32 start, int32 end ) const { return String( *this, start, end ); }
00675 String subString( int32 start ) const { return String( *this, start, length() ); }
00676 bool change( int32 start, const String &other ) {
00677 return m_class->change( this, start, csh::npos, &other );
00678 }
00679 bool change( int32 start, int32 end, const String &other ) {
00680 return m_class->change( this, start, end, &other );
00681 }
00682 void insert( uint32 pos, uint32 len, const String &source ) { m_class->insert( this, pos, len, &source ); }
00683 void remove( uint32 pos, uint32 len ) { m_class->remove( this, pos, len ); }
00684 void append( const String &source );
00685 void append( uint32 chr );
00686 void prepend( uint32 chr );
00687
00688 void prepend( const String &source ) { m_class->insert( this, 0, 0, &source ); }
00689
00690 uint32 find( const String &element, uint32 start=0, uint32 end=csh::npos) const
00691 {
00692 return m_class->find( this, &element, start, end );
00693 }
00694
00695 uint32 rfind( const String &element, uint32 start=0, uint32 end=csh::npos) const
00696 {
00697 return m_class->rfind( this, &element, start, end );
00698 }
00699
00706 int compare( const char *other ) const;
00707
00714 int compare( const wchar_t *other ) const ;
00715
00723 int compare( const String &other ) const;
00724
00738 int compareIgnoreCase( const String &other ) const;
00739
00746 int compareIgnoreCase( const char *other ) const;
00747
00754 int compareIgnoreCase( const wchar_t *other ) const;
00755
00757 bool operator !() { return m_size == 0; }
00758
00759 String & operator+=( const String &other ) { append( other ); return *this; }
00760 String & operator+=( uint32 other ) { append( other ); return *this; }
00761 String & operator+=( char other ) { append( (uint32) other ); return *this; }
00762 String & operator+=( const char *other ) { append( String( other ) ); return *this; }
00763 String & operator+=( wchar_t other ) { append( (uint32) other ); return *this; }
00764 String & operator+=( const wchar_t *other ) { append( String( other ) ); return *this; }
00765
00766 String & operator=( const String &other ) {
00767 copy( other );
00768 return *this;
00769 }
00770
00771 String & operator=( uint32 chr ) {
00772 m_size = 0;
00773 append( chr );
00774 return *this;
00775 }
00776
00786 String & operator=( const char *other ) {
00787 if ( m_storage != 0 )
00788 m_class->destroy( this );
00789 copy( String( other ) );
00790 return *this;
00791 }
00792
00800 bool less( const String &other ) const { return compare( other ) < 0; }
00801
00808 void serialize( Stream *out ) const;
00809
00826 bool deserialize( Stream *in, bool bStatic=false );
00827
00840 void escape( String &target ) const;
00841
00855 void escapeFull( String &target ) const;
00856
00867 void unescape();
00868
00872 void unescape( String &other ) const
00873 {
00874 other = *this;
00875 other.unescape();
00876 }
00877
00885 bool parseInt( int64 &target, uint32 pos = 0 ) const;
00886
00894 bool parseOctal( uint64 &target, uint32 pos = 0 ) const;
00895
00903 bool parseBin( uint64 &target, uint32 pos = 0 ) const;
00904
00912 bool parseHex( uint64 &target, uint32 pos = 0 ) const;
00913
00922 bool parseDouble( double &target, uint32 pos = 0 ) const;
00923
00924
00928 void writeNumber( int64 number );
00929
00935 void writeNumberHex( uint64 number, bool uppercase = true );
00936
00941 void writeNumberOctal( uint64 number );
00942
00948 void writeNumber( double number )
00949 {
00950 writeNumber( number, "%e" );
00951 }
00952
00965 void writeNumber( double number, const String &format );
00966
00967 void writeNumber( int64 number, const String &format );
00968
00974 inline String& N( int64 number )
00975 {
00976 writeNumber( number );
00977 return *this;
00978 }
00979
00985 inline String& N( int32 number )
00986 {
00987 writeNumber( (int64) number );
00988 return *this;
00989 }
00990
00996 inline String& N( int64 number, const String& format )
00997 {
00998 writeNumber( (int64) number, format );
00999 return *this;
01000 }
01001
01007 inline String& N( double number )
01008 {
01009 writeNumber( number );
01010 return *this;
01011 }
01012
01018 inline String& N( double number, const String& format )
01019 {
01020 writeNumber( number, format );
01021 return *this;
01022 }
01023
01030 inline String& A( const String& str ) { append(str); return *this; }
01031
01037 inline String& A( int chr ) { append((uint32)chr); return *this; }
01038
01044 inline String& A( uint32 chr ) { append(chr); return *this; }
01045
01052 bool checkPosBound( int32 &pos )
01053 {
01054 register int s = length();
01055 if ( pos < 0 )
01056 pos = s + pos;
01057 if ( pos < 0 || pos >= s )
01058 return false;
01059 return true;
01060 }
01061
01069 bool checkRangeBound( int32 &begin, int32 &end )
01070 {
01071 register int s = length();
01072 if ( begin < 0 )
01073 begin = s + begin;
01074 if ( begin < 0 || begin >= s )
01075 return false;
01076 if ( end < 0 )
01077 end = s + end;
01078
01079
01080 if ( end < 0 || end > s )
01081 return false;
01082 return true;
01083 }
01084
01091 void reserve( uint32 size )
01092 {
01093 m_class->reserve( this, size );
01094 }
01095
01105 void trim( int mode );
01106 void trim() { trim( 0 ); }
01107
01113 void frontTrim() { trim( 1 ); }
01114 void backTrim() { trim( 2 ); }
01115
01119 void lower();
01120
01124 void upper();
01125
01126 bool isStatic() const {
01127 return manipulator()->type() == csh::cs_static ||
01128 manipulator()->type() == csh::cs_static16 ||
01129 manipulator()->type() == csh::cs_static32;
01130 }
01131
01148 bool fromUTF8( const char *utf8 );
01149
01159 const uint32 operator []( uint32 pos ) const { return getCharAt( pos ); }
01160
01164 bool exported() const { return m_bExported; }
01167 void exported( bool e ) { m_bExported = e; }
01168
01176 void c_ize();
01177
01185 bool startsWith( const String &str, bool icase=false ) const;
01186
01194 bool endsWith( const String &str, bool icase=false ) const;
01195
01196
01218 bool setCharSize( uint32 nsize, uint32 subst=0xFFFFFFFF );
01219
01220 void writeIndex( const Item &index, const Item &target );
01221 void readIndex( const Item &index, Item &target );
01222 void readProperty( const String &prop, Item &item );
01223
01224 bool isCore() const { return m_bCore; }
01225
01226 static void uint32ToHex( uint32 number, char *buffer );
01227 static bool isWhiteSpace( uint32 chr )
01228 {
01229 return chr == ' ' || chr == '\t' || chr == '\r' || chr == '\n';
01230 }
01231 };
01232
01233
01235 inline bool operator == ( const String &str1, const String &str2 ) { return str1.compare( str2 ) == 0; }
01236 inline bool operator == ( const String &str1, const char *str2 ) { return str1.compare( str2 ) == 0; }
01237 inline bool operator == ( const String &str1, const wchar_t *str2 ) { return str1.compare( str2 ) == 0; }
01238 inline bool operator != ( const String &str1, const String &str2 ) { return str1.compare( str2 ) != 0; }
01239 inline bool operator != ( const String &str1, const char *str2 ) { return str1.compare( str2 ) != 0; }
01240 inline bool operator != ( const String &str1, const wchar_t *str2 ) { return str1.compare( str2 ) != 0; }
01241 inline bool operator > ( const String &str1, const String &str2 ) { return str1.compare( str2 ) > 0; }
01242 inline bool operator > ( const String &str1, const wchar_t *str2 ) { return str1.compare( str2 ) > 0; }
01243 inline bool operator > ( const String &str1, const char *str2 ) { return str1.compare( str2 ) > 0; }
01244 inline bool operator < ( const String &str1, const String &str2 ) { return str1.compare( str2 ) < 0; }
01245 inline bool operator < ( const String &str1, const char *str2 ) { return str1.compare( str2 ) < 0; }
01246 inline bool operator < ( const String &str1, const wchar_t *str2 ) { return str1.compare( str2 ) < 0; }
01247 inline bool operator >= ( const String &str1, const String &str2 ) { return str1.compare( str2 ) >= 0; }
01248 inline bool operator >= ( const String &str1, const char *str2 ) { return str1.compare( str2 ) >= 0; }
01249 inline bool operator >= ( const String &str1, const wchar_t *str2 ) { return str1.compare( str2 ) >= 0; }
01250 inline bool operator <= ( const String &str1, const String &str2 ) { return str1.compare( str2 ) <= 0; }
01251 inline bool operator <= ( const String &str1, const char *str2 ) { return str1.compare( str2 ) <= 0; }
01252 inline bool operator <= ( const String &str1, const wchar_t *str2 ) { return str1.compare( str2 ) <= 0; }
01253
01254 inline String operator +( const String &str1, const String &str2 )
01255 { String str3; str3.append( str1 ); str3.append( str2); return str3; }
01256 inline String operator +( const char *str1, const String &str2 )
01257 { String str3; str3.append( str1 ); str3.append( str2); return str3; }
01258 inline String operator +( const wchar_t *str1, const String &str2 )
01259 { String str3; str3.append( str1 ); str3.append( str2); return str3; }
01260 inline String operator +( const String &str1, const char *str2 )
01261 { String str3; str3.append( str1 ); str3.append( str2); return str3; }
01262 inline String operator +( const String &str1, const wchar_t *str2 )
01263 { String str3; str3.append( str1 ); str3.append( str2); return str3; }
01264
01266 class StringPtrCmp
01267 {
01268 public:
01269 bool operator() ( const String *s1, const String *s2 ) const
01270 { return s1->compare( *s2 ) < 0; }
01271 };
01272
01273
01274
01275
01276
01277 void string_deletor( void *data );
01278
01279 class CoreString;
01280
01281 class FALCON_DYN_CLASS StringGarbage: public Garbageable
01282 {
01283 CoreString *m_str;
01284
01285 public:
01286 StringGarbage( CoreString *owner ):
01287 Garbageable(),
01288 m_str( owner )
01289 {}
01290
01291 virtual ~StringGarbage();
01292 virtual bool finalize();
01293 };
01294
01300 class FALCON_DYN_CLASS CoreString: public String
01301 {
01302 StringGarbage m_gcptr;
01303
01304 public:
01305 CoreString():
01306 String(),
01307 m_gcptr( this )
01308 {
01309 m_bCore = true;
01310 }
01311
01312 CoreString( const String &str ):
01313 String( str ),
01314 m_gcptr( this )
01315 {
01316 m_bCore = true;
01317 }
01318
01319 CoreString( const CoreString &str ):
01320 String( str ),
01321 m_gcptr( this )
01322 {
01323 m_bCore = true;
01324 }
01325
01326 CoreString( const char *data ):
01327 String( data ),
01328 m_gcptr( this )
01329 {
01330 m_bCore = true;
01331 }
01332
01333 CoreString( const wchar_t *data ):
01334 String( data ),
01335 m_gcptr( this )
01336 {
01337 m_bCore = true;
01338 }
01339
01340 CoreString( const char *data, int32 len ):
01341 String( data, len ),
01342 m_gcptr( this )
01343 {
01344 m_bCore = true;
01345 }
01346
01347 CoreString( const wchar_t *data, int32 len ):
01348 String( data, len ),
01349 m_gcptr( this )
01350 {
01351 m_bCore = true;
01352 }
01353
01354
01357 explicit CoreString( uint32 prealloc ):
01358 String( prealloc ),
01359 m_gcptr( this )
01360 {
01361 m_bCore = true;
01362 }
01363
01364 CoreString( const String &other, uint32 begin, uint32 end = csh::npos ):
01365 String( other, begin, end ),
01366 m_gcptr( this )
01367 {
01368 m_bCore = true;
01369 }
01370
01371 const StringGarbage &garbage() const { return m_gcptr; }
01372
01373 StringGarbage &garbage() { return m_gcptr; }
01374
01375 void mark( uint32 m ) { m_gcptr.mark( m ); }
01376
01377 CoreString & operator+=( const CoreString &other ) { append( other ); return *this; }
01378 CoreString & operator+=( const String &other ) { append( other ); return *this; }
01379 CoreString & operator+=( uint32 other ) { append( other ); return *this; }
01380 CoreString & operator+=( char other ) { append( (uint32) other ); return *this; }
01381 CoreString & operator+=( const char *other ) { append( String( other ) ); return *this; }
01382 CoreString & operator+=( wchar_t other ) { append( (uint32) other ); return *this; }
01383 CoreString & operator+=( const wchar_t *other ) { append( String( other ) ); return *this; }
01384
01385 CoreString & operator=( const CoreString &other ) {
01386 copy( other );
01387 return *this;
01388 }
01389
01390 CoreString & operator=( const String &other ) {
01391 copy( other );
01392 return *this;
01393 }
01394
01395 CoreString & operator=( uint32 chr ) {
01396 m_size = 0;
01397 append( chr );
01398 return *this;
01399 }
01400
01401
01402 CoreString & operator=( const char *other ) {
01403 if ( m_storage != 0 )
01404 m_class->destroy( this );
01405 copy( String( other ) );
01406 return *this;
01407 }
01408 };
01409
01410
01411
01412
01413
01414
01427 inline CoreString *UTF8String( const char *utf8 )
01428 {
01429 CoreString *str = new CoreString;
01430 str->fromUTF8( utf8 );
01431 return str;
01432 }
01433
01434 }
01435
01436 #endif
01437
01438
01439