KiCad PCB EDA Suite
Loading...
Searching...
No Matches
diptrace_binary_reader.cpp
Go to the documentation of this file.
1/*
2 * This program source code file is part of KiCad, a free EDA CAD application.
3 *
4 * Copyright The KiCad Developers, see AUTHORS.txt for contributors.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <https://www.gnu.org/licenses/>.
18 */
19
21
22#include <algorithm>
23#include <cstdio>
24#include <cstring>
25
26#include <ki_exception.h>
27#include <wx/filename.h>
28#include <wx/translation.h>
29
30
31using namespace DIPTRACE;
32
33
34BINARY_READER::BINARY_READER( const wxString& aFileName ) :
35 m_offset( 0 ),
36 m_version( 0 ),
38{
39 FILE* fp = wxFopen( aFileName, wxT( "rb" ) );
40
41 if( fp == nullptr )
42 {
43 THROW_IO_ERROR( wxString::Format( _( "Cannot open file '%s'." ), aFileName ) );
44 }
45
46 fseek( fp, 0, SEEK_END );
47 long len = ftell( fp );
48
49 if( len < 0 )
50 {
51 fclose( fp );
53 wxString::Format( _( "Cannot determine length of file '%s'." ), aFileName ) );
54 }
55
56 // Reject absurd sizes before allocating so a corrupt or hostile length cannot drive an
57 // arbitrarily large allocation; real DipTrace files are a few MB.
58 static constexpr long MAX_FILE_SIZE = 1L << 30; // 1 GiB
59
60 if( len > MAX_FILE_SIZE )
61 {
62 fclose( fp );
63 THROW_IO_ERROR( wxString::Format( _( "DipTrace file '%s' is too large (%ld bytes)." ),
64 aFileName, len ) );
65 }
66
67 // resize() can throw (bad_alloc/length_error); close the handle on that path too.
68 try
69 {
70 m_data.resize( static_cast<size_t>( len ) );
71 }
72 catch( ... )
73 {
74 fclose( fp );
75 throw;
76 }
77
78 fseek( fp, 0, SEEK_SET );
79
80 size_t bytesRead = fread( m_data.data(), 1, m_data.size(), fp );
81 fclose( fp );
82
83 if( bytesRead != m_data.size() )
84 {
85 THROW_IO_ERROR( wxString::Format( _( "Error reading file '%s'." ), aFileName ) );
86 }
87}
88
89
93
94
95// --- Position management ----------------------------------------------------
96
97
98void BINARY_READER::SetOffset( size_t aOffset )
99{
100 if( aOffset > m_data.size() )
101 {
102 THROW_IO_ERROR( wxString::Format( _( "Seek past end of file (offset %zu, size %zu)." ),
103 aOffset, m_data.size() ) );
104 }
105
106 m_offset = aOffset;
107}
108
109
110void BINARY_READER::Skip( size_t aBytes )
111{
112 // Wrap-safe: m_offset <= size is an invariant, so size - m_offset never underflows; writing it
113 // as m_offset + aBytes would wrap for an attacker-sized count and pass the check.
114 if( aBytes > m_data.size() - m_offset )
115 ThrowEOFError( aBytes );
116
117 m_offset += aBytes;
118}
119
120
121// --- Primitive readers ------------------------------------------------------
122
123
125{
126 if( m_offset + 1 > m_data.size() )
127 ThrowEOFError( 1 );
128
129 uint8_t val = m_data[m_offset];
130 m_offset += 1;
131 return val;
132}
133
134
136{
137 if( m_offset + 3 > m_data.size() )
138 ThrowEOFError( 3 );
139
140 const uint8_t* p = &m_data[m_offset];
141 int raw = ( static_cast<int>( p[0] ) << 16 )
142 | ( static_cast<int>( p[1] ) << 8 )
143 | ( static_cast<int>( p[2] ) );
144 m_offset += 3;
145 return raw - INT3_BIAS;
146}
147
148
150{
151 if( m_offset + 4 > m_data.size() )
152 ThrowEOFError( 4 );
153
154 const uint8_t* p = &m_data[m_offset];
155 unsigned int raw = ( static_cast<unsigned int>( p[0] ) << 24 )
156 | ( static_cast<unsigned int>( p[1] ) << 16 )
157 | ( static_cast<unsigned int>( p[2] ) << 8 )
158 | ( static_cast<unsigned int>( p[3] ) );
159 m_offset += 4;
160
161 // Subtract in int64 so a raw value with the high bit set (>= 2^31) cannot overflow the
162 // intermediate signed int before the bias is applied.
163 return static_cast<int>( static_cast<int64_t>( raw ) - INT4_BIAS );
164}
165
166
168{
170 return ReadStringUTF16();
171
174 {
175 return ReadStringASCII();
176 }
177
178 return ReadStringUTF16();
179}
180
181
182void BINARY_READER::ReadColor( uint8_t& r, uint8_t& g, uint8_t& b )
183{
184 r = ReadByte();
185 g = ReadByte();
186 b = ReadByte();
187}
188
189
190void BINARY_READER::ReadBytes( uint8_t* aDst, size_t aCount )
191{
192 // Wrap-safe (m_offset <= size invariant); m_offset + aCount would wrap for a huge count.
193 if( aCount > m_data.size() - m_offset )
194 ThrowEOFError( aCount );
195
196 std::memcpy( aDst, &m_data[m_offset], aCount );
197 m_offset += aCount;
198}
199
200
201// --- Peek methods -----------------------------------------------------------
202
203
205{
206 if( m_offset + 3 > m_data.size() )
207 {
208 THROW_IO_ERROR( wxString::Format(
209 _( "Unexpected end of file at offset 0x%06zX: need 3 bytes for int3, "
210 "have %zu remaining." ),
211 m_offset, m_data.size() - m_offset ) );
212 }
213
214 const uint8_t* p = &m_data[m_offset];
215 int raw = ( static_cast<int>( p[0] ) << 16 )
216 | ( static_cast<int>( p[1] ) << 8 )
217 | ( static_cast<int>( p[2] ) );
218 return raw - INT3_BIAS;
219}
220
221
223{
224 if( m_offset + 4 > m_data.size() )
225 {
226 THROW_IO_ERROR( wxString::Format(
227 _( "Unexpected end of file at offset 0x%06zX: need 4 bytes for int4, "
228 "have %zu remaining." ),
229 m_offset, m_data.size() - m_offset ) );
230 }
231
232 const uint8_t* p = &m_data[m_offset];
233 unsigned int raw = ( static_cast<unsigned int>( p[0] ) << 24 )
234 | ( static_cast<unsigned int>( p[1] ) << 16 )
235 | ( static_cast<unsigned int>( p[2] ) << 8 )
236 | ( static_cast<unsigned int>( p[3] ) );
237
238 // Subtract in int64 so a raw value with the high bit set cannot overflow the intermediate int.
239 return static_cast<int>( static_cast<int64_t>( raw ) - INT4_BIAS );
240}
241
242
244{
245 if( m_offset >= m_data.size() )
246 {
247 THROW_IO_ERROR( wxString::Format(
248 _( "Unexpected end of file at offset 0x%06zX: need 1 byte." ), m_offset ) );
249 }
250
251 return m_data[m_offset];
252}
253
254
255// --- Coordinate conversion --------------------------------------------------
256
257
258int BINARY_READER::DipTraceToKiCadNm( int aDipTraceCoord )
259{
260 return static_cast<int>( static_cast<int64_t>( aDipTraceCoord ) * 100 / 3 );
261}
262
263
264double BINARY_READER::DipTraceToMM( int aDipTraceCoord )
265{
266 return static_cast<double>( aDipTraceCoord ) * DIPTRACE_COORD_TO_MM;
267}
268
269
270// --- Search helpers ---------------------------------------------------------
271
272
273size_t BINARY_READER::FindPattern( const uint8_t* aPattern, size_t aPatternLen,
274 size_t aStart, size_t aEnd ) const
275{
276 if( aEnd == 0 || aEnd > m_data.size() )
277 aEnd = m_data.size();
278
279 if( aStart >= aEnd || aPatternLen == 0 || aPatternLen > ( aEnd - aStart ) )
280 return std::string::npos;
281
282 auto it = std::search( m_data.begin() + aStart,
283 m_data.begin() + aEnd,
284 aPattern,
285 aPattern + aPatternLen );
286
287 if( it == m_data.begin() + aEnd )
288 return std::string::npos;
289
290 return static_cast<size_t>( it - m_data.begin() );
291}
292
293
294size_t BINARY_READER::FindString( const wxString& aStr, size_t aStart, size_t aEnd ) const
295{
296 if( aStr.IsEmpty() )
297 return std::string::npos;
298
299 // Encode the string as UTF-16-BE, which is the v39+ on-disk representation.
300 // The on-disk format has a 2-byte length prefix before the encoded characters.
301 wxMBConvUTF16BE conv;
302
303 // wxMBConvUTF16BE::FromWChar includes a BOM; we must skip it.
304 // We encode manually: each wxChar becomes 2 bytes in UTF-16-BE.
305 size_t charCount = aStr.length();
306 std::vector<uint8_t> encoded( charCount * 2 );
307
308 for( size_t i = 0; i < charCount; i++ )
309 {
310 wxChar ch = aStr[i];
311 encoded[i * 2] = static_cast<uint8_t>( ( ch >> 8 ) & 0xFF );
312 encoded[i * 2 + 1] = static_cast<uint8_t>( ch & 0xFF );
313 }
314
315 // Search for the encoded character data in the file buffer.
316 size_t matchPos = FindPattern( encoded.data(), encoded.size(), aStart, aEnd );
317
318 if( matchPos == std::string::npos )
319 return std::string::npos;
320
321 // The length prefix sits 2 bytes before the encoded character data.
322 if( matchPos < 2 )
323 return std::string::npos;
324
325 return matchPos - 2;
326}
327
328
329// --- Try-read methods -------------------------------------------------------
330
331
332bool BINARY_READER::TryReadString( wxString& aResult )
333{
335 return TryReadStringUTF16( aResult );
336
339 {
340 return TryReadStringASCII( aResult );
341 }
342
343 return TryReadStringUTF16( aResult );
344}
345
346
347void BINARY_READER::DetectStringEncoding( size_t aProbeOffset )
348{
349 size_t savedOffset = m_offset;
350 STRING_ENCODING savedEncoding = m_stringEncoding;
351
352 // The probe helpers below dispatch on m_stringEncoding, so force each framing explicitly.
353 m_offset = aProbeOffset;
355 wxString asciiStr;
356 bool asciiOk = TryReadStringASCII( asciiStr ) && !asciiStr.IsEmpty();
357
358 m_offset = aProbeOffset;
360 wxString utf16Str;
361 bool utf16Ok = TryReadStringUTF16( utf16Str ) && !utf16Str.IsEmpty();
362
363 m_offset = savedOffset;
364 m_stringEncoding = savedEncoding;
365
366 // Only commit when exactly one framing yields a printable string; otherwise leave the
367 // version-based default in place.
368 if( asciiOk && !utf16Ok )
370 else if( utf16Ok && !asciiOk )
372}
373
374
375// --- Private string readers -------------------------------------------------
376
377
379{
380 if( m_offset + 2 > m_data.size() )
381 ThrowEOFError( 2 );
382
383 const uint8_t* p = &m_data[m_offset];
384 int charCount = ( static_cast<int>( p[0] ) << 8 ) | static_cast<int>( p[1] );
385 m_offset += 2;
386
387 if( charCount == 0 )
388 return wxString();
389
390 if( charCount < 0 || charCount > MAX_STRING_CHARS )
391 {
392 THROW_IO_ERROR( wxString::Format(
393 _( "Unreasonable string length %d at offset 0x%06zX." ),
394 charCount, m_offset - 2 ) );
395 }
396
397 size_t byteCount = static_cast<size_t>( charCount ) * 2;
398
399 if( m_offset + byteCount > m_data.size() )
400 ThrowEOFError( byteCount );
401
402 // wxMBConvUTF16BE converts from a big-endian byte stream.
403 wxMBConvUTF16BE conv;
404 wxString result = wxString( reinterpret_cast<const char*>( &m_data[m_offset] ),
405 conv, byteCount );
406
407 m_offset += byteCount;
408 return result;
409}
410
411
413{
414 int byteCount = ReadInt3();
415
416 if( byteCount == 0 )
417 return wxString();
418
419 if( byteCount < 0 || byteCount > MAX_STRING_CHARS )
420 {
421 THROW_IO_ERROR( wxString::Format(
422 _( "Unreasonable v37 string length %d at offset 0x%06zX." ),
423 byteCount, m_offset - 3 ) );
424 }
425
426 size_t count = static_cast<size_t>( byteCount );
427
428 if( m_offset + count > m_data.size() )
429 ThrowEOFError( count );
430
431 wxString result = wxString::From8BitData(
432 reinterpret_cast<const char*>( &m_data[m_offset] ), count );
433 m_offset += count;
434 return result;
435}
436
437
438bool BINARY_READER::TryReadStringUTF16( wxString& aResult )
439{
440 size_t savedOffset = m_offset;
441
442 if( m_offset + 2 > m_data.size() )
443 return false;
444
445 const uint8_t* p = &m_data[m_offset];
446 int charCount = ( static_cast<int>( p[0] ) << 8 ) | static_cast<int>( p[1] );
447 m_offset += 2;
448
449 if( charCount == 0 )
450 {
451 aResult = wxString();
452 return true;
453 }
454
455 if( charCount < 0 || charCount > 500 )
456 {
457 m_offset = savedOffset;
458 return false;
459 }
460
461 size_t byteCount = static_cast<size_t>( charCount ) * 2;
462
463 if( m_offset + byteCount > m_data.size() )
464 {
465 m_offset = savedOffset;
466 return false;
467 }
468
469 wxMBConvUTF16BE conv;
470 wxString candidate = wxString( reinterpret_cast<const char*>( &m_data[m_offset] ),
471 conv, byteCount );
472
473 if( !IsPrintableString( candidate ) )
474 {
475 m_offset = savedOffset;
476 return false;
477 }
478
479 m_offset += byteCount;
480 aResult = candidate;
481 return true;
482}
483
484
485bool BINARY_READER::TryReadStringASCII( wxString& aResult )
486{
487 size_t savedOffset = m_offset;
488
489 if( m_offset + 3 > m_data.size() )
490 return false;
491
492 const uint8_t* p = &m_data[m_offset];
493 int byteCount = ( static_cast<int>( p[0] ) << 16 )
494 | ( static_cast<int>( p[1] ) << 8 )
495 | ( static_cast<int>( p[2] ) );
496 byteCount -= INT3_BIAS;
497 m_offset += 3;
498
499 if( byteCount == 0 )
500 {
501 aResult = wxString();
502 return true;
503 }
504
505 if( byteCount < 0 || byteCount > 500 )
506 {
507 m_offset = savedOffset;
508 return false;
509 }
510
511 size_t count = static_cast<size_t>( byteCount );
512
513 if( m_offset + count > m_data.size() )
514 {
515 m_offset = savedOffset;
516 return false;
517 }
518
519 wxString candidate = wxString::From8BitData(
520 reinterpret_cast<const char*>( &m_data[m_offset] ), count );
521
522 if( !IsPrintableString( candidate ) )
523 {
524 m_offset = savedOffset;
525 return false;
526 }
527
528 m_offset += count;
529 aResult = candidate;
530 return true;
531}
532
533
534bool BINARY_READER::IsPrintableString( const wxString& aStr )
535{
536 for( size_t i = 0; i < aStr.length(); i++ )
537 {
538 wxChar ch = aStr[i];
539
540 if( ch == '\r' || ch == '\n' || ch == '\t' )
541 continue;
542
543 if( ch < 0x20 )
544 return false;
545 }
546
547 return true;
548}
549
550
551void BINARY_READER::ThrowEOFError( size_t aBytesNeeded ) const
552{
553 size_t remaining = ( m_offset < m_data.size() ) ? ( m_data.size() - m_offset ) : 0;
554
555 THROW_IO_ERROR( wxString::Format(
556 _( "Unexpected end of file at offset 0x%06zX: need %zu bytes, have %zu remaining." ),
557 m_offset, aBytesNeeded, remaining ) );
558}
bool TryReadStringUTF16(wxString &aResult)
Attempt to read a UTF-16-BE string with validation.
size_t FindString(const wxString &aStr, size_t aStart, size_t aEnd) const
Search for a UTF-16-BE encoded string in the file data, including its two-byte length prefix.
int PeekInt3() const
Peek at the next 3-byte biased integer without advancing the position.
wxString ReadStringASCII()
Read a v37 legacy ASCII string: int3(byte_count) + raw ASCII bytes.
bool TryReadStringASCII(wxString &aResult)
Attempt to read a legacy ASCII string with validation.
void ReadBytes(uint8_t *aDst, size_t aCount)
Read a block of raw bytes into the caller's buffer.
int m_version
DipTrace format version.
void Skip(size_t aBytes)
Advance the read position by the given number of bytes.
uint8_t ReadByte()
Read a single unsigned byte and advance the position by 1.
size_t m_offset
Current read position (byte offset).
uint8_t PeekByte() const
Peek at the next byte without advancing the position.
void ThrowEOFError(size_t aBytesNeeded) const
Throw IO_ERROR with a message indicating a read past end of file.
void DetectStringEncoding(size_t aProbeOffset)
Detect the string encoding from the bytes at aProbeOffset, which must sit at the start of a non-empty...
STRING_ENCODING m_stringEncoding
Explicit string encoding override.
static bool IsPrintableString(const wxString &aStr)
Verify that all characters in aStr are printable or common whitespace (space, tab,...
void ReadColor(uint8_t &r, uint8_t &g, uint8_t &b)
Read a 3-byte RGB color value.
int ReadInt4()
Read a 4-byte big-endian biased integer (bias 1,000,000,000) and advance the position by 4.
int ReadInt3()
Read a 3-byte big-endian biased integer (bias 1,000,000) and advance the position by 3.
static int DipTraceToKiCadNm(int aDipTraceCoord)
Convert a DipTrace coordinate value (10 nm units) to KiCad nanometers.
size_t FindPattern(const uint8_t *aPattern, size_t aPatternLen, size_t aStart, size_t aEnd) const
Search for a byte pattern in the file data.
void SetOffset(size_t aOffset)
Set the read position to an absolute byte offset.
bool TryReadString(wxString &aResult)
Attempt to read a string at the current position.
wxString ReadStringUTF16()
Read a v39+ UTF-16-BE string: uint16-BE char count + UTF-16-BE data.
BINARY_READER(const wxString &aFileName)
Construct a reader by loading the given file into memory.
int PeekInt4() const
Peek at the next 4-byte biased integer without advancing the position.
static double DipTraceToMM(int aDipTraceCoord)
Convert a DipTrace coordinate value (10 nm units) to millimeters.
wxString ReadString()
Read a string using the configured encoding.
std::vector< uint8_t > m_data
Entire file contents loaded into memory.
#define _(s)
#define THROW_IO_ERROR(msg)
macro which captures the "call site" values of FILE_, __FUNCTION & LINE
constexpr double DIPTRACE_COORD_TO_MM
DipTrace uses 762 units per mil (30 000 units per mm).
constexpr int MAX_STRING_CHARS
Maximum sane string length (in characters) accepted by the reader.
constexpr int INT4_BIAS
Bias value added to stored 4-byte unsigned integers.
constexpr int LEGACY_STRING_VERSION
Format version at or below which strings use the legacy ASCII encoding (int3 byte-count + raw ASCII b...
constexpr int INT3_BIAS
Bias value added to stored 3-byte unsigned integers.
wxString result
Test unit parsing edge cases and error handling.