KiCad PCB EDA Suite
Loading...
Searching...
No Matches
identity_reconciler.cpp
Go to the documentation of this file.
1/*
2 * This program source code file is part of KiCad, a free EDA CAD application.
3 *
4 * Copyright The KiCad Developers, see AUTHORS.txt for contributors.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 3
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, you may find one here:
18 * http://www.gnu.org/licenses/gpl-3.0.html
19 */
20
22
23#include <algorithm>
24#include <cmath>
25
26
27namespace KICAD_DIFF
28{
29
30namespace
31{
32
33constexpr double POSITION_WEIGHT = 0.40;
34constexpr double BBOX_WEIGHT = 0.20;
35constexpr double KEYPROPS_WEIGHT = 0.40;
36
37
38bool positionsEqual( const VECTOR2I& aA, const VECTOR2I& aB, unsigned int aTolerance )
39{
40 if( aTolerance == 0 )
41 return aA == aB;
42
43 // Widen to 64-bit before subtracting: VECTOR2I components are int32 and
44 // coordinates near opposite extremes would overflow a 32-bit difference.
45 const int64_t tol = aTolerance;
46
47 return std::abs( static_cast<int64_t>( aA.x ) - aB.x ) <= tol
48 && std::abs( static_cast<int64_t>( aA.y ) - aB.y ) <= tol;
49}
50
51
52bool bboxesEqual( const BOX2I& aA, const BOX2I& aB, unsigned int aTolerance )
53{
54 if( aTolerance == 0 )
55 return aA == aB;
56
57 return positionsEqual( aA.GetOrigin(), aB.GetOrigin(), aTolerance )
58 && positionsEqual( aA.GetEnd(), aB.GetEnd(), aTolerance );
59}
60
61
70double keyPropOverlap( const std::vector<std::pair<wxString, std::string>>& aA,
71 const std::vector<std::pair<wxString, std::string>>& aB )
72{
73 // Absent identifying properties supply no positive evidence — fall back to
74 // position/bbox alone. Returning 1.0 here would let two generic items at
75 // the same position false-match at the maximum possible score.
76 if( aA.empty() || aB.empty() )
77 return 0.0;
78
79 std::size_t matches = 0;
80
81 for( const auto& [name, value] : aA )
82 {
83 auto it = std::find_if( aB.begin(), aB.end(),
84 [&]( const std::pair<wxString, std::string>& aPair )
85 {
86 return aPair.first == name && aPair.second == value;
87 } );
88
89 if( it != aB.end() )
90 ++matches;
91 }
92
93 std::size_t denom = std::max( aA.size(), aB.size() );
94 return static_cast<double>( matches ) / static_cast<double>( denom );
95}
96
97} // namespace
98
99
101 const ITEM_DESCRIPTOR& aB ) const
102{
103 if( aA.type != aB.type )
104 return 0.0;
105
106 double score = 0.0;
107
108 if( positionsEqual( aA.position, aB.position, m_config.positionTolerance ) )
109 score += POSITION_WEIGHT;
110
111 if( bboxesEqual( aA.bbox, aB.bbox, m_config.positionTolerance ) )
112 score += BBOX_WEIGHT;
113
114 score += keyPropOverlap( aA.keyProps, aB.keyProps ) * KEYPROPS_WEIGHT;
115
116 return score;
117}
118
119
120RECONCILIATION IDENTITY_RECONCILER::Reconcile( const std::vector<ITEM_DESCRIPTOR>& aA,
121 const std::vector<ITEM_DESCRIPTOR>& aB ) const
122{
124
125 // Index A and B by KIID_PATH. Track duplicates while building the indices.
126 std::map<KIID_PATH, std::size_t> aIndex;
127 std::map<KIID_PATH, std::size_t> bIndex;
128 std::set<KIID_PATH> aSeen;
129 std::set<KIID_PATH> bSeen;
130
131 for( std::size_t i = 0; i < aA.size(); ++i )
132 {
133 const KIID_PATH& id = aA[i].id;
134
135 if( aSeen.count( id ) )
136 {
137 if( m_config.detectDuplicates
138 && std::find( result.duplicatesA.begin(), result.duplicatesA.end(), id )
139 == result.duplicatesA.end() )
140 {
141 result.duplicatesA.push_back( id );
142 }
143
144 continue; // first-seen wins for indexing
145 }
146
147 aSeen.insert( id );
148 aIndex[id] = i;
149 }
150
151 for( std::size_t i = 0; i < aB.size(); ++i )
152 {
153 const KIID_PATH& id = aB[i].id;
154
155 if( bSeen.count( id ) )
156 {
157 if( m_config.detectDuplicates
158 && std::find( result.duplicatesB.begin(), result.duplicatesB.end(), id )
159 == result.duplicatesB.end() )
160 {
161 result.duplicatesB.push_back( id );
162 }
163
164 continue;
165 }
166
167 bSeen.insert( id );
168 bIndex[id] = i;
169 }
170
171 // Items with duplicate KIID_PATHs are reported separately and excluded
172 // from normal matching — once identity is ambiguous, any "match" is
173 // arbitrary and would mask the real problem from the user.
174 for( const KIID_PATH& dup : result.duplicatesA )
175 aIndex.erase( dup );
176
177 for( const KIID_PATH& dup : result.duplicatesB )
178 bIndex.erase( dup );
179
180 // Pass 1: KIID_PATH direct matches.
181 std::set<std::size_t> matchedB;
182
183 for( const auto& [idA, indexA] : aIndex )
184 {
185 auto it = bIndex.find( idA );
186
187 if( it == bIndex.end() )
188 continue;
189
190 // Same KIID_PATH in both sides — direct match, even if other properties differ.
191 result.aToB[idA] = idA;
192 result.bToA[idA] = idA;
193 matchedB.insert( it->second );
194 }
195
196 // Pass 2: similarity fallback for unmatched items.
197 if( m_config.enableSimilarity )
198 {
199 // Collect unmatched A and B items
200 std::vector<std::size_t> unmatchedA;
201
202 for( const auto& [idA, indexA] : aIndex )
203 {
204 if( result.aToB.find( idA ) == result.aToB.end() )
205 unmatchedA.push_back( indexA );
206 }
207
208 std::vector<std::size_t> unmatchedB;
209
210 for( const auto& [idB, indexB] : bIndex )
211 {
212 if( matchedB.count( indexB ) == 0 )
213 unmatchedB.push_back( indexB );
214 }
215
216 // Score every cross pair and greedily take the best ones above threshold.
217 // Deterministic order: sort candidate pairs by (score desc, A index asc, B index asc).
218 struct Candidate
219 {
220 double score;
221 std::size_t aIdx;
222 std::size_t bIdx;
223 };
224
225 // Bucket B by type first so the cross-product only walks type-compatible
226 // pairs. ScoreSimilarity returns 0 for mismatched types anyway, but
227 // building those zero scores is the dominant cost at scale (5000 PCB
228 // items can produce 25M zero-score evaluations).
229 std::map<wxString, std::vector<std::size_t>> bByType;
230
231 for( std::size_t bIdx : unmatchedB )
232 bByType[aB[bIdx].type].push_back( bIdx );
233
234 // Candidates is grown without pre-reserve: a pessimistic
235 // |A|*|B| reserve was 600 MB at the plan budget (5000 items).
236 // Most pairs fail the type filter or the threshold; the vector
237 // ends up tiny in practice.
238 std::vector<Candidate> candidates;
239
240 for( std::size_t aIdx : unmatchedA )
241 {
242 auto it = bByType.find( aA[aIdx].type );
243
244 if( it == bByType.end() )
245 continue;
246
247 for( std::size_t bIdx : it->second )
248 {
249 double s = ScoreSimilarity( aA[aIdx], aB[bIdx] );
250
251 if( s >= m_config.similarityThreshold )
252 candidates.push_back( { s, aIdx, bIdx } );
253 }
254 }
255
256 std::sort( candidates.begin(), candidates.end(),
257 []( const Candidate& aL, const Candidate& aR )
258 {
259 if( aL.score != aR.score )
260 return aL.score > aR.score;
261
262 if( aL.aIdx != aR.aIdx )
263 return aL.aIdx < aR.aIdx;
264
265 return aL.bIdx < aR.bIdx;
266 } );
267
268 std::set<std::size_t> usedA;
269 std::set<std::size_t> usedB;
270
271 for( const Candidate& c : candidates )
272 {
273 if( usedA.count( c.aIdx ) || usedB.count( c.bIdx ) )
274 continue;
275
276 result.aToB[aA[c.aIdx].id] = aB[c.bIdx].id;
277 result.bToA[aB[c.bIdx].id] = aA[c.aIdx].id;
278 usedA.insert( c.aIdx );
279 usedB.insert( c.bIdx );
280 matchedB.insert( c.bIdx );
281 ++result.similarityMatches;
282 }
283 }
284
285 // Anything still unmatched goes into aOnly / bOnly.
286 for( const auto& [idA, indexA] : aIndex )
287 {
288 if( result.aToB.find( idA ) == result.aToB.end() )
289 result.aOnly.insert( idA );
290 }
291
292 for( const auto& [idB, indexB] : bIndex )
293 {
294 if( result.bToA.find( idB ) == result.bToA.end() )
295 result.bOnly.insert( idB );
296 }
297
298 return result;
299}
300
301} // namespace KICAD_DIFF
const char * name
BOX2< VECTOR2I > BOX2I
Definition box2.h:918
constexpr const Vec GetEnd() const
Definition box2.h:208
constexpr const Vec & GetOrigin() const
Definition box2.h:206
RECONCILIATION Reconcile(const std::vector< ITEM_DESCRIPTOR > &aA, const std::vector< ITEM_DESCRIPTOR > &aB) const
double ScoreSimilarity(const ITEM_DESCRIPTOR &aA, const ITEM_DESCRIPTOR &aB) const
Compute the similarity score between two items of the same type.
EDA_ANGLE abs(const EDA_ANGLE &aAngle)
Definition eda_angle.h:400
Descriptor used by the identity reconciler to compare items across two documents.
std::vector< std::pair< wxString, std::string > > keyProps
Maps every item in document A to either a peer in document B or to "only-in-A", and vice versa.
wxString result
Test unit parsing edge cases and error handling.
VECTOR2< int32_t > VECTOR2I
Definition vector2d.h:683